Skip to content

Commit 72d9877

Browse files
authored
Fix node options and OOM retries (#1897)
* don't hardcode max old space size in deployment images * flags: treat underscores as hyphens * append attempt number to runner name if >1 * improve retry spans for oom errors
1 parent a51b9b9 commit 72d9877

File tree

9 files changed

+28
-9
lines changed

9 files changed

+28
-9
lines changed

apps/supervisor/src/util.ts

+8-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ export function getDockerHostDomain() {
55
return isMacOs || isWindows ? "host.docker.internal" : "localhost";
66
}
77

8-
export function getRunnerId(runId: string) {
9-
return `runner-${runId.replace("run_", "")}`;
8+
export function getRunnerId(runId: string, attemptNumber?: number) {
9+
const parts = ["runner", runId.replace("run_", "")];
10+
11+
if (attemptNumber && attemptNumber > 1) {
12+
parts.push(`attempt-${attemptNumber}`);
13+
}
14+
15+
return parts.join("-");
1016
}

apps/supervisor/src/workloadManager/docker.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export class DockerWorkloadManager implements WorkloadManager {
2222
async create(opts: WorkloadManagerCreateOptions) {
2323
this.logger.log("[DockerWorkloadProvider] Creating container", { opts });
2424

25-
const runnerId = getRunnerId(opts.runFriendlyId);
25+
const runnerId = getRunnerId(opts.runFriendlyId, opts.nextAttemptNumber);
2626

2727
const runArgs = [
2828
"run",

apps/supervisor/src/workloadManager/kubernetes.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
3131
async create(opts: WorkloadManagerCreateOptions) {
3232
this.logger.log("[KubernetesWorkloadManager] Creating container", { opts });
3333

34-
const runnerId = getRunnerId(opts.runFriendlyId);
34+
const runnerId = getRunnerId(opts.runFriendlyId, opts.nextAttemptNumber);
3535

3636
try {
3737
await this.k8s.core.createNamespacedPod({

apps/webapp/app/v3/runEngineHandlers.server.ts

+8-1
Original file line numberDiff line numberDiff line change
@@ -326,12 +326,19 @@ export function registerRunEngineEventBusHandlers() {
326326

327327
engine.eventBus.on("runRetryScheduled", async ({ time, run, environment, retryAt }) => {
328328
try {
329-
await eventRepository.recordEvent(`Retry #${run.attemptNumber} delay`, {
329+
let retryMessage = `Retry #${run.attemptNumber} delay`;
330+
331+
if (run.nextMachineAfterOOM) {
332+
retryMessage += ` after OOM`;
333+
}
334+
335+
await eventRepository.recordEvent(retryMessage, {
330336
taskSlug: run.taskIdentifier,
331337
environment,
332338
attributes: {
333339
properties: {
334340
retryAt: retryAt.toISOString(),
341+
nextMachine: run.nextMachineAfterOOM,
335342
},
336343
runId: run.friendlyId,
337344
style: {

internal-packages/run-engine/src/engine/eventBus.ts

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ export type EventBusEvents = {
8585
traceContext: Record<string, string | undefined>;
8686
taskIdentifier: string;
8787
baseCostInCents: number;
88+
nextMachineAfterOOM?: string;
8889
};
8990
organization: {
9091
id: string;

internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts

+1
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ export class RunAttemptSystem {
692692
traceContext: run.traceContext as Record<string, string | undefined>,
693693
baseCostInCents: run.baseCostInCents,
694694
spanId: run.spanId,
695+
nextMachineAfterOOM: retryResult.machine,
695696
},
696697
organization: {
697698
id: run.runtimeEnvironment.organizationId,

packages/cli-v3/src/deploy/buildImage.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,7 @@ ENV TRIGGER_PROJECT_ID=\${TRIGGER_PROJECT_ID} \
688688
TRIGGER_CONTENT_HASH=\${TRIGGER_CONTENT_HASH} \
689689
TRIGGER_PROJECT_REF=\${TRIGGER_PROJECT_REF} \
690690
NODE_EXTRA_CA_CERTS=\${NODE_EXTRA_CA_CERTS} \
691-
NODE_ENV=production \
692-
NODE_OPTIONS="--max_old_space_size=8192"
691+
NODE_ENV=production
693692
694693
# Copy the files from the install stage
695694
COPY --from=build --chown=node:node /app ./

packages/core/src/v3/build/flags.test.ts

+5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ describe("dedupFlags", () => {
1818
expect(dedupFlags("--log=info --log=warn --log=error")).toBe("--log=error");
1919
});
2020

21+
it("should treat underscores as hyphens", () => {
22+
expect(dedupFlags("--debug_level=info")).toBe("--debug-level=info");
23+
expect(dedupFlags("--debug_level=info --debug-level=warn")).toBe("--debug-level=warn");
24+
});
25+
2126
it("should handle mix of flags with and without values", () => {
2227
expect(dedupFlags("--debug=false -v --debug=true")).toBe("-v --debug=true");
2328
expect(dedupFlags("-v --quiet -v")).toBe("--quiet -v");

packages/core/src/v3/build/flags.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ export function dedupFlags(flags: string): string {
2727
.map((flag): [string, string | boolean] => {
2828
const equalIndex = flag.indexOf("=");
2929
if (equalIndex !== -1) {
30-
const key = flag.slice(0, equalIndex);
30+
const key = flag.slice(0, equalIndex).replace(/_/g, "-");
3131
const value = flag.slice(equalIndex + 1);
3232
return [key, value];
3333
} else {
34-
return [flag, true];
34+
return [flag.replace(/_/g, "-"), true];
3535
}
3636
});
3737

0 commit comments

Comments
 (0)