openclaw/test/evals/dataset.ts

141 lines
4.1 KiB
TypeScript

import type { EvalCase } from "./types.js";
export const chatAgentEvalDataset: EvalCase[] = [
{
id: "tool-call-success",
description: "single-tool answer includes expected retrieval summary",
critical: true,
events: [
{ type: "user-message", id: "u-1", text: "Find docs for workspace lock", globalSeq: 1 },
{ type: "tool-input-start", toolCallId: "tool-1", toolName: "searchDocs", globalSeq: 2 },
{
type: "tool-input-available",
toolCallId: "tool-1",
globalSeq: 3,
output: { query: "workspace lock" },
},
{
type: "tool-output-available",
toolCallId: "tool-1",
globalSeq: 4,
output: { hits: 3 },
},
{ type: "text-start", globalSeq: 5 },
{
type: "text-delta",
delta: "Found 3 matching documents about workspace lock.",
globalSeq: 6,
},
{ type: "text-end", globalSeq: 7 },
],
expectations: {
output: {
mustContain: ["Found 3 matching documents"],
mustNotContain: ["NO_REPLY"],
},
trajectory: {
tools: ["searchDocs"],
mode: "strict",
},
trace: {
requiredTypes: ["tool-input-start", "tool-output-available", "text-delta"],
requireMonotonicGlobalSeq: true,
},
},
},
{
id: "subagent-roundtrip",
description:
"subagent spawn followed by completion announcement is preserved in final response",
critical: true,
events: [
{ type: "user-message", id: "u-2", text: "Research customer timeline", globalSeq: 1 },
{ type: "tool-input-start", toolCallId: "tool-2", toolName: "spawnSubagent", globalSeq: 2 },
{
type: "tool-output-available",
toolCallId: "tool-2",
globalSeq: 3,
output: { childSessionKey: "sub:abc", task: "timeline research" },
},
{
type: "tool-input-start",
toolCallId: "tool-3",
toolName: "collectSubagentResult",
globalSeq: 4,
},
{
type: "tool-output-available",
toolCallId: "tool-3",
globalSeq: 5,
output: { summary: "Key milestones collected." },
},
{ type: "text-start", globalSeq: 6 },
{
type: "text-delta",
delta: "Subagent finished and reported back with key milestones.",
globalSeq: 7,
},
{ type: "text-end", globalSeq: 8 },
],
expectations: {
output: {
mustContain: ["Subagent finished and reported back"],
},
trajectory: {
tools: ["spawnSubagent", "collectSubagentResult"],
mode: "strict",
},
trace: {
requiredTypes: ["tool-input-start", "tool-output-available", "text-delta"],
requireMonotonicGlobalSeq: true,
},
},
},
{
id: "crm-view-mutation",
description:
"CRM mutation path applies object view metadata and confirms result in assistant output",
critical: false,
events: [
{ type: "user-message", id: "u-3", text: "Set Important as active leads view", globalSeq: 1 },
{ type: "tool-input-start", toolCallId: "tool-4", toolName: "saveObjectViews", globalSeq: 2 },
{
type: "tool-output-available",
toolCallId: "tool-4",
globalSeq: 3,
output: { object: "leads", activeView: "Important" },
},
{
type: "tool-input-start",
toolCallId: "tool-5",
toolName: "fetchObjectEntries",
globalSeq: 4,
},
{
type: "tool-output-available",
toolCallId: "tool-5",
globalSeq: 5,
output: { filteredRows: 12 },
},
{
type: "text-delta",
delta: "Active view 'Important' is now applied to leads.",
globalSeq: 6,
},
],
expectations: {
output: {
mustContain: ["Active view 'Important' is now applied"],
},
trajectory: {
tools: ["saveObjectViews", "fetchObjectEntries"],
mode: "strict",
},
trace: {
requiredTypes: ["tool-input-start", "tool-output-available", "text-delta"],
requireMonotonicGlobalSeq: true,
},
},
},
];