Add deterministic context control layer that intercepts prompt construction without modifying existing architecture: - context_engine.py: single choke point (build_context) that assembles structured prompts from ledger + sigil + live window, with token budget enforcement and automatic window shrinking - ledger.py: bounded per-stream JSON state (orientation, blockers, open questions, delta) with hard field/list limits - sigil.py: FIFO shorthand memory (max 15 entries) with deterministic rule-based generation from message patterns - token_gate.py: fast token estimation (~4 chars/token) and hard cap enforcement with configurable MAX_TOKENS/LIVE_WINDOW - redact.py: secret pattern detection (Discord, OpenAI, Anthropic, AWS, Slack, GitHub, Telegram, Bearer, generic key=value) replaced with [REDACTED_SECRET] before any output path All 64 tests passing. No modifications to existing agent spawning, model routing, tool system, or Discord relay architecture. https://claude.ai/code/session_01K7BWJY2gUoJi6dq91Yc7nx
203 lines
6.3 KiB
Python
203 lines
6.3 KiB
Python
"""
|
|
ra2.context_engine — The single choke point for all model calls.
|
|
|
|
All prompts must pass through build_context() before reaching any provider.
|
|
|
|
Internal flow:
|
|
1. Load ledger state for stream
|
|
2. Load sigil state
|
|
3. Load last N live messages (default LIVE_WINDOW)
|
|
4. Run rule-based compression pass
|
|
5. Assemble structured prompt
|
|
6. Estimate token count
|
|
7. If > MAX_TOKENS: shrink live window, reassemble
|
|
8. If still > MAX_TOKENS: raise controlled exception
|
|
|
|
Never reads full .md history.
|
|
"""
|
|
|
|
import re
|
|
from typing import List, Optional
|
|
|
|
from ra2 import ledger, sigil, token_gate, redact
|
|
|
|
# ── Compression rule patterns ───────────────────────────────────────
|
|
|
|
_DECISION_RE = re.compile(
|
|
r"(?:we\s+will|we\s+chose|decided\s+to|going\s+to|let'?s)\s+(.{10,120})",
|
|
re.IGNORECASE,
|
|
)
|
|
_ARCHITECTURE_RE = re.compile(
|
|
r"(?:architect(?:ure)?|refactor|redesign|restructur|migrat)\w*\s+(.{10,120})",
|
|
re.IGNORECASE,
|
|
)
|
|
_COST_RE = re.compile(
|
|
r"(?:budget|cost|spend|rate[_\s]*limit|token[_\s]*cap|pricing)\s*[:=→]?\s*(.{5,120})",
|
|
re.IGNORECASE,
|
|
)
|
|
_BLOCKER_RE = re.compile(
|
|
r"(?:block(?:er|ed|ing)|stuck|cannot|can'?t\s+proceed|waiting\s+on)\s+(.{5,120})",
|
|
re.IGNORECASE,
|
|
)
|
|
_QUESTION_RE = re.compile(
|
|
r"(?:should\s+we|do\s+we|how\s+(?:do|should)|what\s+(?:if|about)|need\s+to\s+decide)\s+(.{5,120})",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def _extract_content(msg: dict) -> str:
|
|
"""Get text content from a message dict."""
|
|
content = msg.get("content", "")
|
|
if isinstance(content, str):
|
|
return content
|
|
if isinstance(content, list):
|
|
# Handle structured content blocks
|
|
parts = []
|
|
for block in content:
|
|
if isinstance(block, dict) and block.get("type") == "text":
|
|
parts.append(block.get("text", ""))
|
|
elif isinstance(block, str):
|
|
parts.append(block)
|
|
return " ".join(parts)
|
|
return str(content)
|
|
|
|
|
|
def _run_compression(messages: list, stream_id: str) -> None:
|
|
"""Rule-based compression pass over recent messages.
|
|
|
|
Extracts decisions, architecture shifts, cost constraints, blockers,
|
|
and open questions — then updates the ledger accordingly.
|
|
"""
|
|
decisions: list[str] = []
|
|
blockers: list[str] = []
|
|
open_questions: list[str] = []
|
|
latest_summary_parts: list[str] = []
|
|
|
|
for msg in messages:
|
|
text = _extract_content(msg)
|
|
if not text:
|
|
continue
|
|
|
|
# Decisions
|
|
for m in _DECISION_RE.finditer(text):
|
|
decisions.append(m.group(1).strip())
|
|
|
|
# Architecture shifts
|
|
for m in _ARCHITECTURE_RE.finditer(text):
|
|
latest_summary_parts.append(f"arch: {m.group(1).strip()}")
|
|
|
|
# Cost/budget
|
|
for m in _COST_RE.finditer(text):
|
|
latest_summary_parts.append(f"cost: {m.group(1).strip()}")
|
|
|
|
# Blockers
|
|
for m in _BLOCKER_RE.finditer(text):
|
|
blockers.append(m.group(1).strip())
|
|
|
|
# Open questions
|
|
for m in _QUESTION_RE.finditer(text):
|
|
open_questions.append(m.group(1).strip())
|
|
|
|
# Sigil generation
|
|
sigil_body = sigil.generate_from_message(text)
|
|
if sigil_body:
|
|
sigil.append(stream_id, sigil_body)
|
|
|
|
# Build delta from decisions
|
|
delta = "; ".join(decisions[-5:]) if decisions else ""
|
|
latest = "; ".join(latest_summary_parts[-5:]) if latest_summary_parts else ""
|
|
|
|
# Update ledger (only non-empty fields)
|
|
updates = {}
|
|
if delta:
|
|
updates["delta"] = delta
|
|
if latest:
|
|
updates["latest"] = latest
|
|
if blockers:
|
|
updates["blockers"] = blockers[-token_gate.MAX_TOKENS:] # bounded
|
|
if open_questions:
|
|
updates["open"] = open_questions[-10:]
|
|
|
|
if updates:
|
|
ledger.update(stream_id, **updates)
|
|
|
|
|
|
def _assemble_prompt(stream_id: str, live_messages: list) -> str:
|
|
"""Build the structured prompt from ledger + sigil + live window."""
|
|
sections = []
|
|
|
|
# Ledger section
|
|
ledger_snap = ledger.snapshot(stream_id)
|
|
sections.append(f"=== LEDGER ===\n{ledger_snap}")
|
|
|
|
# Sigil section
|
|
sigil_snap = sigil.snapshot(stream_id)
|
|
sections.append(f"=== SIGIL ===\n{sigil_snap}")
|
|
|
|
# Live window section
|
|
live_lines = []
|
|
for msg in live_messages:
|
|
role = msg.get("role", "unknown")
|
|
content = _extract_content(msg)
|
|
live_lines.append(f"[{role}] {content}")
|
|
sections.append("=== LIVE WINDOW ===\n" + "\n".join(live_lines))
|
|
|
|
# Closing directive
|
|
sections.append("Respond concisely and aligned with orientation.")
|
|
|
|
return "\n\n".join(sections)
|
|
|
|
|
|
def build_context(stream_id: str, new_messages: list) -> dict:
|
|
"""Main entry point — the single choke point for all model calls.
|
|
|
|
Args:
|
|
stream_id: Unique identifier for the conversation stream.
|
|
new_messages: List of message dicts with at minimum 'role' and 'content'.
|
|
|
|
Returns:
|
|
{
|
|
"prompt": str, # The assembled, redacted prompt
|
|
"token_estimate": int # Estimated token count
|
|
}
|
|
|
|
Raises:
|
|
token_gate.TokenBudgetExceeded: If prompt exceeds MAX_TOKENS
|
|
even after shrinking the live window to minimum.
|
|
"""
|
|
# 1. Run compression pass on new messages → updates ledger + sigils
|
|
_run_compression(new_messages, stream_id)
|
|
|
|
# 2. Determine live window
|
|
window_size = token_gate.LIVE_WINDOW
|
|
live_messages = new_messages[-window_size:]
|
|
|
|
# 3. Assemble prompt
|
|
prompt = _assemble_prompt(stream_id, live_messages)
|
|
|
|
# 4. Redact secrets
|
|
prompt = redact.redact(prompt)
|
|
|
|
# 5. Estimate tokens
|
|
estimated = token_gate.estimate_tokens(prompt)
|
|
|
|
# 6. Shrink loop if over budget
|
|
while not token_gate.check_budget(estimated):
|
|
try:
|
|
window_size = token_gate.shrink_window(window_size)
|
|
except token_gate.TokenBudgetExceeded:
|
|
# Already at minimum window — hard fail
|
|
raise token_gate.TokenBudgetExceeded(
|
|
estimated=estimated,
|
|
limit=token_gate.MAX_TOKENS,
|
|
)
|
|
live_messages = new_messages[-window_size:]
|
|
prompt = _assemble_prompt(stream_id, live_messages)
|
|
prompt = redact.redact(prompt)
|
|
estimated = token_gate.estimate_tokens(prompt)
|
|
|
|
return {
|
|
"prompt": prompt,
|
|
"token_estimate": estimated,
|
|
}
|