""" ra2.context_engine — The single choke point for all model calls. All prompts must pass through build_context() before reaching any provider. Internal flow: 1. Redact secrets from incoming messages 2. Run rule-based compression pass (writes redacted data to ledger/sigils) 3. Determine live window from redacted messages 4. Assemble structured prompt 5. Estimate token count 6. If > MAX_TOKENS: shrink live window, reassemble 7. If still > MAX_TOKENS: raise controlled exception Never reads full .md history. """ import re from typing import List, Optional from ra2 import ledger, sigil, token_gate, redact # ── Compression rule patterns ─────────────────────────────────────── _DECISION_RE = re.compile( r"(?:we\s+will|we\s+chose|decided\s+to|going\s+to|let'?s)\s+(.{10,120})", re.IGNORECASE, ) _ARCHITECTURE_RE = re.compile( r"(?:architect(?:ure)?|refactor|redesign|restructur|migrat)\w*\s+(.{10,120})", re.IGNORECASE, ) _COST_RE = re.compile( r"(?:budget|cost|spend|rate[_\s]*limit|token[_\s]*cap|pricing)\s*[:=→]?\s*(.{5,120})", re.IGNORECASE, ) _BLOCKER_RE = re.compile( r"(?:block(?:er|ed|ing)|stuck|cannot|can'?t\s+proceed|waiting\s+on)\s+(.{5,120})", re.IGNORECASE, ) _QUESTION_RE = re.compile( r"(?:should\s+we|do\s+we|how\s+(?:do|should)|what\s+(?:if|about)|need\s+to\s+decide)\s+(.{5,120})", re.IGNORECASE, ) def _extract_content(msg: dict) -> str: """Get text content from a message dict.""" content = msg.get("content", "") if isinstance(content, str): return content if isinstance(content, list): # Handle structured content blocks parts = [] for block in content: if isinstance(block, dict) and block.get("type") == "text": parts.append(block.get("text", "")) elif isinstance(block, str): parts.append(block) return " ".join(parts) return str(content) def _run_compression(messages: list, stream_id: str) -> None: """Rule-based compression pass over recent messages. Extracts decisions, architecture shifts, cost constraints, blockers, and open questions — then updates the ledger accordingly. """ decisions: list[str] = [] blockers: list[str] = [] open_questions: list[str] = [] latest_summary_parts: list[str] = [] for msg in messages: text = _extract_content(msg) if not text: continue # Decisions for m in _DECISION_RE.finditer(text): decisions.append(m.group(1).strip()) # Architecture shifts for m in _ARCHITECTURE_RE.finditer(text): latest_summary_parts.append(f"arch: {m.group(1).strip()}") # Cost/budget for m in _COST_RE.finditer(text): latest_summary_parts.append(f"cost: {m.group(1).strip()}") # Blockers for m in _BLOCKER_RE.finditer(text): blockers.append(m.group(1).strip()) # Open questions for m in _QUESTION_RE.finditer(text): open_questions.append(m.group(1).strip()) # Sigil event generation sigil_triple = sigil.generate_from_message(text) if sigil_triple: op, constraint, decision = sigil_triple sigil.append_event(stream_id, op, constraint, decision) # Build delta from decisions delta = "; ".join(decisions[-5:]) if decisions else "" latest = "; ".join(latest_summary_parts[-5:]) if latest_summary_parts else "" # Update ledger (only non-empty fields) updates = {} if delta: updates["delta"] = delta if latest: updates["latest"] = latest if blockers: updates["blockers"] = blockers[-ledger.MAX_BLOCKERS:] # bounded if open_questions: updates["open"] = open_questions[-10:] if updates: ledger.update(stream_id, **updates) def _assemble_prompt(stream_id: str, live_messages: list) -> str: """Build the structured prompt from ledger + (optional sigil) + live window.""" sections = [] # Sigil section — only when DEBUG_SIGIL is enabled if sigil.DEBUG_SIGIL: sigil_snap = sigil.snapshot(stream_id) if sigil_snap != "(no sigils)": sections.append( f"=== INTERNAL SIGIL SNAPSHOT ===\n{sigil_snap}" ) # Ledger section ledger_snap = ledger.snapshot(stream_id) sections.append(f"=== LEDGER ===\n{ledger_snap}") # Live window section live_lines = [] for msg in live_messages: role = msg.get("role", "unknown") content = _extract_content(msg) live_lines.append(f"[{role}] {content}") sections.append("=== LIVE WINDOW ===\n" + "\n".join(live_lines)) # Closing directive sections.append("Respond concisely and aligned with orientation.") return "\n\n".join(sections) def build_context(stream_id: str, new_messages: list) -> dict: """Main entry point — the single choke point for all model calls. Args: stream_id: Unique identifier for the conversation stream. new_messages: List of message dicts with at minimum 'role' and 'content'. Returns: { "prompt": str, # The assembled, redacted prompt "token_estimate": int # Estimated token count } Raises: token_gate.TokenBudgetExceeded: If prompt exceeds MAX_TOKENS even after shrinking the live window to minimum. """ # 1. Redact secrets before any disk-persisting step (ledger/sigil writes) safe_messages = redact.redact_messages(new_messages) # 2. Run compression pass on redacted messages → updates ledger + sigils _run_compression(safe_messages, stream_id) # 3. Determine live window (from already-redacted messages) window_size = token_gate.LIVE_WINDOW live_messages = safe_messages[-window_size:] # 4. Assemble prompt prompt = _assemble_prompt(stream_id, live_messages) # 5. Estimate tokens estimated = token_gate.estimate_tokens(prompt) # 6. Shrink loop if over budget while not token_gate.check_budget(estimated): try: window_size = token_gate.shrink_window(window_size) except token_gate.TokenBudgetExceeded: # Already at minimum window — hard fail raise token_gate.TokenBudgetExceeded( estimated=estimated, limit=token_gate.MAX_TOKENS, ) live_messages = safe_messages[-window_size:] prompt = _assemble_prompt(stream_id, live_messages) estimated = token_gate.estimate_tokens(prompt) return { "prompt": prompt, "token_estimate": estimated, }