Harden sharded Linux test scheduling

This commit is contained in:
Junebugg1214 2026-03-19 23:31:41 -04:00
parent b17b4f9fd3
commit 221ee04166
2 changed files with 64 additions and 6 deletions

View File

@ -267,6 +267,7 @@ const parseEnvNumber = (name, fallback) => {
return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
};
const shardedCi = isCI && shardCount > 1;
const shardedLinuxCi = shardedCi && !isWindows && !isMacOS;
const allKnownUnitFiles = allKnownTestFiles.filter((file) => {
return isUnitConfigTestFile(file);
});
@ -312,6 +313,9 @@ const timedHeavyUnitFiles =
const unitFastExcludedFiles = [
...new Set([...unitBehaviorOverrideSet, ...timedHeavyUnitFiles, ...channelSingletonFiles]),
];
// Sharded Linux CI still sees the broadest heap retention in the shared unit-fast lane. Prefer
// process forks there so the workers release memory more aggressively between files.
const unitFastPool = useVmForks && !shardedLinuxCi ? "vmForks" : "forks";
const estimateUnitDurationMs = (file) =>
unitTimingManifest.files[file]?.durationMs ?? unitTimingManifest.defaultDurationMs;
const heavyUnitBuckets = packFilesByDuration(
@ -333,7 +337,7 @@ const baseRuns = [
"run",
"--config",
"vitest.unit.config.ts",
`--pool=${useVmForks ? "vmForks" : "forks"}`,
`--pool=${unitFastPool}`,
...(disableIsolation ? ["--isolate=false"] : []),
...unitFastExcludedFiles.flatMap((file) => ["--exclude", file]),
],
@ -700,6 +704,9 @@ const maxWorkersForRun = (name) => {
if (resolvedOverride) {
return resolvedOverride;
}
if (shardedLinuxCi && name === "unit-fast") {
return 1;
}
if (name.endsWith("-threads") || name.endsWith("-vmforks")) {
return 1;
}
@ -1191,7 +1198,10 @@ if (passthroughRequiresSingleRun && passthroughOptionArgs.length > 0) {
process.exit(2);
}
if (isMacMiniProfile && targetedEntries.length === 0) {
const shouldFrontloadUnitFast =
targetedEntries.length === 0 && (isMacMiniProfile || shardedLinuxCi);
if (shouldFrontloadUnitFast) {
const unitFastEntry = parallelRuns.find((entry) => entry.name === "unit-fast");
if (unitFastEntry) {
const unitFastCode = await run(unitFastEntry, passthroughOptionArgs);
@ -1200,13 +1210,13 @@ if (isMacMiniProfile && targetedEntries.length === 0) {
}
}
const deferredEntries = parallelRuns.filter((entry) => entry.name !== "unit-fast");
const failedMacMiniParallel = await runEntriesWithLimit(
const failedDeferredParallel = await runEntriesWithLimit(
deferredEntries,
passthroughOptionArgs,
3,
isMacMiniProfile ? 3 : topLevelParallelLimit,
);
if (failedMacMiniParallel !== undefined) {
process.exit(failedMacMiniParallel);
if (failedDeferredParallel !== undefined) {
process.exit(failedDeferredParallel);
}
} else {
const failedParallel = await runEntries(parallelRuns, passthroughOptionArgs);

View File

@ -278,6 +278,54 @@
{
"file": "src/tui/tui.submit-handler.test.ts",
"reason": "TUI submit handler coverage retained the largest shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 1 OOM lane."
},
{
"file": "src/cron/isolated-agent.uses-last-non-empty-agent-text-as.test.ts",
"reason": "Isolated-agent last-non-empty text coverage retained the largest shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 1 rerun OOM lane."
},
{
"file": "src/infra/restart.test.ts",
"reason": "Restart coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 1 rerun OOM lane."
},
{
"file": "src/infra/update-runner.test.ts",
"reason": "Update runner coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 1 rerun OOM lane."
},
{
"file": "src/plugins/commands.test.ts",
"reason": "Plugin commands coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 1 rerun OOM lane."
},
{
"file": "src/infra/outbound/message-action-runner.context.test.ts",
"reason": "Message action runner context coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 1 rerun OOM lane."
},
{
"file": "src/cron/service.main-job-passes-heartbeat-target-last.test.ts",
"reason": "Cron main-job heartbeat target coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 1 rerun OOM lane."
},
{
"file": "src/config/schema.help.quality.test.ts",
"reason": "Schema help quality coverage retained the largest shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 2 rerun OOM lane."
},
{
"file": "src/infra/run-node.test.ts",
"reason": "Run-node coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 2 rerun OOM lane."
},
{
"file": "src/media-understanding/providers/openai/audio.test.ts",
"reason": "OpenAI audio provider coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 2 rerun OOM lane."
},
{
"file": "src/media/fetch.telegram-network.test.ts",
"reason": "Telegram network fetch coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 2 rerun OOM lane."
},
{
"file": "src/channels/plugins/contracts/group-policy.contract.test.ts",
"reason": "Group policy contract coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 2 rerun OOM lane."
},
{
"file": "src/config/sessions/store.pruning.test.ts",
"reason": "Session store pruning coverage retained a top shared unit-fast heap spike in the March 20, 2026 Linux Node 24 shard 2 rerun OOM lane."
}
],
"threadSingleton": [