fix: drain inbound debounce buffer and followup queues before SIGUSR1 reload
When config.patch triggers a SIGUSR1 restart, two in-memory message
buffers were silently wiped:
1. Per-channel inbound debounce buffers (closure-local Map + setTimeout)
2. Followup queues (global Map of pending session messages)
This caused inbound messages received during the debounce window to be
permanently lost on config-triggered gateway restarts.
Fix:
- Add a global registry of inbound debouncers so they can be flushed
collectively during restart. Each createInboundDebouncer() call now
auto-registers in a shared Symbol.for() map, with a new flushAll()
method that immediately processes all buffered items.
- Add flushAllInboundDebouncers() which iterates the global registry
and forces all debounce timers to fire immediately.
- Add waitForFollowupQueueDrain() which polls the FOLLOWUP_QUEUES map
until all queues finish processing (or timeout).
- Hook both into the SIGUSR1 restart flow in run-loop.ts: before
markGatewayDraining(), flush all debouncers first (pushing buffered
messages into the followup queues), then wait up to 5s for the
followup drain loops to process them.
The ordering is critical: flush debouncers → wait for followup drain →
then mark draining. This ensures messages that were mid-debounce get
delivered to sessions before the gateway reinitializes.
Tests:
- flushAllInboundDebouncers: flushes multiple registered debouncers,
returns count, deregisters after flush
- createInboundDebouncer.flushAll: flushes all keys in a single debouncer
- waitForFollowupQueueDrain: immediate return when empty, waits for
drain, returns not-drained on timeout, counts draining queues
- run-loop: SIGUSR1 calls flush before markGatewayDraining, skips
followup wait when no debouncers had buffered messages, logs warning
on followup drain timeout
2026-03-14 11:54:01 -04:00
|
|
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
|
|
|
import { waitForFollowupQueueDrain } from "./drain-all.js";
|
|
|
|
|
import { FOLLOWUP_QUEUES, type FollowupQueueState } from "./state.js";
|
|
|
|
|
|
|
|
|
|
function createMockQueue(overrides: Partial<FollowupQueueState> = {}): FollowupQueueState {
|
|
|
|
|
return {
|
|
|
|
|
items: [],
|
|
|
|
|
draining: false,
|
|
|
|
|
lastEnqueuedAt: 0,
|
|
|
|
|
mode: "followup",
|
|
|
|
|
debounceMs: 1000,
|
|
|
|
|
cap: 20,
|
|
|
|
|
dropPolicy: "summarize",
|
|
|
|
|
droppedCount: 0,
|
|
|
|
|
summaryLines: [],
|
|
|
|
|
...overrides,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
afterEach(() => {
|
|
|
|
|
FOLLOWUP_QUEUES.clear();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe("waitForFollowupQueueDrain", () => {
|
|
|
|
|
it("returns drained immediately when no queues exist", async () => {
|
|
|
|
|
const result = await waitForFollowupQueueDrain(1000);
|
|
|
|
|
expect(result).toEqual({ drained: true, remaining: 0 });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("returns drained immediately when all queues are empty", async () => {
|
|
|
|
|
FOLLOWUP_QUEUES.set("test", createMockQueue());
|
|
|
|
|
const result = await waitForFollowupQueueDrain(1000);
|
|
|
|
|
expect(result).toEqual({ drained: true, remaining: 0 });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("waits until queues are drained", async () => {
|
|
|
|
|
const queue = createMockQueue({
|
|
|
|
|
items: [
|
|
|
|
|
{ prompt: "test", run: vi.fn() as unknown, enqueuedAt: Date.now() },
|
|
|
|
|
] as FollowupQueueState["items"],
|
|
|
|
|
draining: true,
|
|
|
|
|
});
|
|
|
|
|
FOLLOWUP_QUEUES.set("test", queue);
|
|
|
|
|
|
|
|
|
|
// Simulate drain completing after 100ms
|
|
|
|
|
setTimeout(() => {
|
|
|
|
|
queue.items.length = 0;
|
|
|
|
|
queue.draining = false;
|
|
|
|
|
FOLLOWUP_QUEUES.delete("test");
|
|
|
|
|
}, 100);
|
|
|
|
|
|
|
|
|
|
const result = await waitForFollowupQueueDrain(5000);
|
|
|
|
|
expect(result.drained).toBe(true);
|
|
|
|
|
expect(result.remaining).toBe(0);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("returns not drained on timeout", async () => {
|
|
|
|
|
const queue = createMockQueue({
|
|
|
|
|
items: [
|
|
|
|
|
{ prompt: "test", run: vi.fn() as unknown, enqueuedAt: Date.now() },
|
|
|
|
|
] as FollowupQueueState["items"],
|
|
|
|
|
draining: true,
|
|
|
|
|
});
|
|
|
|
|
FOLLOWUP_QUEUES.set("test", queue);
|
|
|
|
|
|
|
|
|
|
const result = await waitForFollowupQueueDrain(100);
|
|
|
|
|
expect(result.drained).toBe(false);
|
|
|
|
|
expect(result.remaining).toBeGreaterThan(0);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("counts draining queues as having pending items even with empty items array", async () => {
|
|
|
|
|
const queue = createMockQueue({ draining: true });
|
|
|
|
|
FOLLOWUP_QUEUES.set("test", queue);
|
|
|
|
|
|
|
|
|
|
// Queue has no items but is still draining — should wait
|
|
|
|
|
const result = await waitForFollowupQueueDrain(100);
|
|
|
|
|
expect(result.drained).toBe(false);
|
|
|
|
|
expect(result.remaining).toBeGreaterThanOrEqual(1);
|
|
|
|
|
});
|
2026-03-14 15:16:48 -04:00
|
|
|
|
|
|
|
|
it("reports each draining queue in the timeout remaining count", async () => {
|
|
|
|
|
FOLLOWUP_QUEUES.set("queue-1", createMockQueue({ draining: true }));
|
|
|
|
|
FOLLOWUP_QUEUES.set("queue-2", createMockQueue({ draining: true }));
|
|
|
|
|
FOLLOWUP_QUEUES.set("queue-3", createMockQueue({ draining: true }));
|
|
|
|
|
|
|
|
|
|
const result = await waitForFollowupQueueDrain(1);
|
|
|
|
|
expect(result).toEqual({ drained: false, remaining: 3 });
|
|
|
|
|
});
|
fix: drain inbound debounce buffer and followup queues before SIGUSR1 reload
When config.patch triggers a SIGUSR1 restart, two in-memory message
buffers were silently wiped:
1. Per-channel inbound debounce buffers (closure-local Map + setTimeout)
2. Followup queues (global Map of pending session messages)
This caused inbound messages received during the debounce window to be
permanently lost on config-triggered gateway restarts.
Fix:
- Add a global registry of inbound debouncers so they can be flushed
collectively during restart. Each createInboundDebouncer() call now
auto-registers in a shared Symbol.for() map, with a new flushAll()
method that immediately processes all buffered items.
- Add flushAllInboundDebouncers() which iterates the global registry
and forces all debounce timers to fire immediately.
- Add waitForFollowupQueueDrain() which polls the FOLLOWUP_QUEUES map
until all queues finish processing (or timeout).
- Hook both into the SIGUSR1 restart flow in run-loop.ts: before
markGatewayDraining(), flush all debouncers first (pushing buffered
messages into the followup queues), then wait up to 5s for the
followup drain loops to process them.
The ordering is critical: flush debouncers → wait for followup drain →
then mark draining. This ensures messages that were mid-debounce get
delivered to sessions before the gateway reinitializes.
Tests:
- flushAllInboundDebouncers: flushes multiple registered debouncers,
returns count, deregisters after flush
- createInboundDebouncer.flushAll: flushes all keys in a single debouncer
- waitForFollowupQueueDrain: immediate return when empty, waits for
drain, returns not-drained on timeout, counts draining queues
- run-loop: SIGUSR1 calls flush before markGatewayDraining, skips
followup wait when no debouncers had buffered messages, logs warning
on followup drain timeout
2026-03-14 11:54:01 -04:00
|
|
|
});
|