Merge 110bfd8bb7d4063f994441df2a2e454f45cbac73 into 6b4c24c2e55b5b4013277bd799525086f6a0c40f

This commit is contained in:
Saurabh Mishra 2026-03-21 04:45:10 +00:00 committed by GitHub
commit fc6157fafa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 69 additions and 6 deletions

View File

@ -112,7 +112,14 @@ function capFrozenResultText(resultText: string): string {
0,
FROZEN_RESULT_TEXT_MAX_BYTES - Buffer.byteLength(notice, "utf8"),
);
const payload = Buffer.from(trimmed, "utf8").subarray(0, maxPayloadBytes).toString("utf8");
const buf = Buffer.from(trimmed, "utf8");
let end = maxPayloadBytes;
// Walk back if we landed in the middle of a multi-byte UTF-8 sequence
// (continuation bytes have the bit pattern 10xxxxxx, i.e. 0x80..0xBF).
while (end > 0 && buf[end] !== undefined && (buf[end] & 0xc0) === 0x80) {
end--;
}
const payload = buf.subarray(0, end).toString("utf8");
return `${payload}${notice}`;
}

View File

@ -35,6 +35,47 @@ describe.runIf(process.platform !== "win32")("requestJsonlSocket", () => {
});
});
it("handles multi-byte UTF-8 characters split across TCP chunks", async () => {
await withTempDir({ prefix: "openclaw-jsonl-socket-" }, async (dir) => {
const socketPath = path.join(dir, "socket.sock");
// "你好" in UTF-8 is 6 bytes: e4 bd a0 e5 a5 bd
// We split the JSON line so the first chunk ends mid-character.
const fullLine = '{"text":"你好"}\n';
const fullBuf = Buffer.from(fullLine, "utf8");
// Split inside the second character (after first byte of 好)
const splitPoint = fullBuf.indexOf(0xa5); // second byte of 好 (e5 a5 bd)
const chunk1 = fullBuf.subarray(0, splitPoint);
const chunk2 = fullBuf.subarray(splitPoint);
const server = net.createServer((socket) => {
socket.on("data", () => {
// Send the two halves separately to simulate a TCP chunk boundary
// landing in the middle of a multi-byte UTF-8 sequence.
socket.write(chunk1);
setTimeout(() => socket.write(chunk2), 10);
});
});
await new Promise<void>((resolve) => server.listen(socketPath, resolve));
try {
const result = await requestJsonlSocket({
socketPath,
payload: "{}",
timeoutMs: 2000,
accept: (msg) => {
const value = msg as { text?: string };
return value.text ?? null;
},
});
expect(result).toBe("你好");
// Verify no U+FFFD replacement character
expect(result).not.toContain("\uFFFD");
} finally {
server.close();
}
});
});
it("returns null on timeout and on socket errors", async () => {
await withTempDir({ prefix: "openclaw-jsonl-socket-" }, async (dir) => {
const socketPath = path.join(dir, "socket.sock");

View File

@ -1,4 +1,5 @@
import net from "node:net";
import { StringDecoder } from "node:string_decoder";
export async function requestJsonlSocket<T>(params: {
socketPath: string;
@ -11,6 +12,10 @@ export async function requestJsonlSocket<T>(params: {
const client = new net.Socket();
let settled = false;
let buffer = "";
// StringDecoder buffers incomplete multi-byte UTF-8 sequences across
// TCP chunks, preventing U+FFFD replacement when a character boundary
// falls on a chunk boundary.
const decoder = new StringDecoder("utf8");
const finish = (value: T | null) => {
if (settled) {
@ -32,7 +37,7 @@ export async function requestJsonlSocket<T>(params: {
client.write(`${payload}\n`);
});
client.on("data", (data) => {
buffer += data.toString("utf8");
buffer += decoder.write(data);
let idx = buffer.indexOf("\n");
while (idx !== -1) {
const line = buffer.slice(0, idx).trim();

View File

@ -1,4 +1,5 @@
import { spawn } from "node:child_process";
import { StringDecoder } from "node:string_decoder";
import {
materializeWindowsSpawnProgram,
resolveWindowsSpawnProgram,
@ -55,16 +56,20 @@ export async function runCliCommand(params: {
reject(new Error(`${params.commandSummary} timed out after ${params.timeoutMs}ms`));
}, params.timeoutMs)
: null;
// StringDecoder buffers incomplete multi-byte UTF-8 sequences across
// pipe chunks, preventing U+FFFD replacement at chunk boundaries.
const stdoutDecoder = new StringDecoder("utf8");
const stderrDecoder = new StringDecoder("utf8");
child.stdout.on("data", (data) => {
if (discardStdout) {
return;
}
const next = appendOutputWithCap(stdout, data.toString("utf8"), params.maxOutputChars);
const next = appendOutputWithCap(stdout, stdoutDecoder.write(data), params.maxOutputChars);
stdout = next.text;
stdoutTruncated = stdoutTruncated || next.truncated;
});
child.stderr.on("data", (data) => {
const next = appendOutputWithCap(stderr, data.toString("utf8"), params.maxOutputChars);
const next = appendOutputWithCap(stderr, stderrDecoder.write(data), params.maxOutputChars);
stderr = next.text;
stderrTruncated = stderrTruncated || next.truncated;
});

View File

@ -1,4 +1,5 @@
import { spawn } from "node:child_process";
import { StringDecoder } from "node:string_decoder";
import type { Component, SelectItem } from "@mariozechner/pi-tui";
import { createSearchableSelectList } from "./components/selectors.js";
@ -116,11 +117,15 @@ export function createLocalShellRunner(deps: LocalShellDeps) {
let stdout = "";
let stderr = "";
// StringDecoder buffers incomplete multi-byte UTF-8 sequences across
// pipe chunks, preventing U+FFFD replacement at chunk boundaries.
const stdoutDecoder = new StringDecoder("utf8");
const stderrDecoder = new StringDecoder("utf8");
child.stdout.on("data", (buf) => {
stdout = appendWithCap(stdout, buf.toString("utf8"));
stdout = appendWithCap(stdout, stdoutDecoder.write(buf));
});
child.stderr.on("data", (buf) => {
stderr = appendWithCap(stderr, buf.toString("utf8"));
stderr = appendWithCap(stderr, stderrDecoder.write(buf));
});
child.on("close", (code, signal) => {