From bc3c5abd64fd22a0623b7c0a1c0b55c84ea02160 Mon Sep 17 00:00:00 2001 From: Daniel Amar Date: Mon, 5 May 2025 15:25:06 -0400 Subject: [PATCH 1/3] Add retry mechanism with exponential backoff for buildkit bootstrap Signed-off-by: Daniel Amar --- src/main.ts | 70 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 7 deletions(-) diff --git a/src/main.ts b/src/main.ts index 13f0261..f7c451b 100644 --- a/src/main.ts +++ b/src/main.ts @@ -17,6 +17,46 @@ import {ContextInfo} from '@docker/actions-toolkit/lib/types/docker/docker'; import * as context from './context'; import * as stateHelper from './state-helper'; +/** + * Retry a function with exponential backoff + */ +async function retryWithBackoff( + operation: () => Promise, + maxRetries: number = 3, + initialDelay: number = 1000, + maxDelay: number = 10000, + shouldRetry: (error: Error) => boolean = () => true +): Promise { + let retries = 0; + let delay = initialDelay; + + while (true) { + try { + return await operation(); + } catch (error) { + if (retries >= maxRetries || !shouldRetry(error)) { + throw error; + } + + retries++; + core.info(`Retry ${retries}/${maxRetries} after ${delay}ms due to: ${error.message}`); + await new Promise(resolve => setTimeout(resolve, delay)); + + // Exponential backoff with jitter + delay = Math.min(delay * 2, maxDelay) * (0.8 + Math.random() * 0.4); + } + } +} + +/** + * Check if an error is a buildkit socket connection error + */ +function isBuildkitSocketError(error: Error): boolean { + return error.message.includes('/run/buildkit/buildkitd.sock') || + error.message.includes('failed to list workers') || + error.message.includes('connection error'); +} + actionsToolkit.run( // main async () => { @@ -165,13 +205,29 @@ actionsToolkit.run( await core.group(`Booting builder`, async () => { const inspectCmd = await toolkit.buildx.getCommand(await context.getInspectArgs(inputs, toolkit)); - await Exec.getExecOutput(inspectCmd.command, inspectCmd.args, { - ignoreReturnCode: true - }).then(res => { - if (res.stderr.length > 0 && res.exitCode != 0) { - throw new Error(res.stderr.match(/(.*)\s*$/)?.[0]?.trim() ?? 'unknown error'); - } - }); + + try { + await retryWithBackoff( + async () => { + const res = await Exec.getExecOutput(inspectCmd.command, inspectCmd.args, { + ignoreReturnCode: true + }); + + if (res.stderr.length > 0 && res.exitCode != 0) { + throw new Error(res.stderr.match(/(.*)\s*$/)?.[0]?.trim() ?? 'unknown error'); + } + return res; + }, + 5, // maxRetries - retry up to 5 times for buildkit initialization + 1000, // initialDelay - start with 1 second + 15000, // maxDelay - cap at 15 seconds + isBuildkitSocketError // only retry on buildkit socket errors + ); + } catch (error) { + // Log the warning but continue - this matches current behavior where builds still succeed + core.warning(`Failed to bootstrap builder after multiple retries: ${error.message}`); + core.warning('Continuing execution as buildkit daemon may initialize later'); + } }); if (inputs.install) { From 66d7daabf819d7c699230fe5ad0f1c4475cd54a5 Mon Sep 17 00:00:00 2001 From: Daniel Amar Date: Mon, 5 May 2025 16:03:51 -0400 Subject: [PATCH 2/3] Add timeout control to prevent long waits during buildkit initialization Signed-off-by: Daniel Amar --- src/main.ts | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/main.ts b/src/main.ts index f7c451b..ce83109 100644 --- a/src/main.ts +++ b/src/main.ts @@ -209,19 +209,35 @@ actionsToolkit.run( try { await retryWithBackoff( async () => { - const res = await Exec.getExecOutput(inspectCmd.command, inspectCmd.args, { - ignoreReturnCode: true + // Create a promise that will timeout after 15 seconds + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => { + reject(new Error('Timeout exceeded while waiting for buildkit to initialize')); + }, 15000); // 15 second timeout }); - if (res.stderr.length > 0 && res.exitCode != 0) { - throw new Error(res.stderr.match(/(.*)\s*$/)?.[0]?.trim() ?? 'unknown error'); - } - return res; + // Create the actual command execution promise + const execPromise = Exec.getExecOutput(inspectCmd.command, inspectCmd.args, { + ignoreReturnCode: true + }).then(res => { + if (res.stderr.length > 0 && res.exitCode != 0) { + throw new Error(res.stderr.match(/(.*)\s*$/)?.[0]?.trim() ?? 'unknown error'); + } + return res; + }); + + // Race the timeout against the actual command + // If the command takes too long, we'll get the timeout error instead + return Promise.race([execPromise, timeoutPromise]); }, 5, // maxRetries - retry up to 5 times for buildkit initialization 1000, // initialDelay - start with 1 second 15000, // maxDelay - cap at 15 seconds - isBuildkitSocketError // only retry on buildkit socket errors + (error) => { + // Retry on buildkit socket errors or timeouts + return isBuildkitSocketError(error) || + error.message.includes('Timeout exceeded while waiting for buildkit'); + } ); } catch (error) { // Log the warning but continue - this matches current behavior where builds still succeed From 80d22c042f326cc531e8a9315edbcc1e82e0f8f8 Mon Sep 17 00:00:00 2001 From: Daniel Amar Date: Tue, 27 May 2025 13:53:26 -0400 Subject: [PATCH 3/3] remove retry only on certain errors. maxRetries set to 3 instead of 5. --- src/main.ts | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/main.ts b/src/main.ts index ce83109..f9cdeb5 100644 --- a/src/main.ts +++ b/src/main.ts @@ -48,15 +48,6 @@ async function retryWithBackoff( } } -/** - * Check if an error is a buildkit socket connection error - */ -function isBuildkitSocketError(error: Error): boolean { - return error.message.includes('/run/buildkit/buildkitd.sock') || - error.message.includes('failed to list workers') || - error.message.includes('connection error'); -} - actionsToolkit.run( // main async () => { @@ -230,14 +221,9 @@ actionsToolkit.run( // If the command takes too long, we'll get the timeout error instead return Promise.race([execPromise, timeoutPromise]); }, - 5, // maxRetries - retry up to 5 times for buildkit initialization + 3, // maxRetries - retry up to 3 times for buildkit initialization 1000, // initialDelay - start with 1 second - 15000, // maxDelay - cap at 15 seconds - (error) => { - // Retry on buildkit socket errors or timeouts - return isBuildkitSocketError(error) || - error.message.includes('Timeout exceeded while waiting for buildkit'); - } + 15000 // maxDelay - cap at 15 seconds ); } catch (error) { // Log the warning but continue - this matches current behavior where builds still succeed