From 6d9a16e513898026e1ba3d7d47299e1011addc73 Mon Sep 17 00:00:00 2001 From: roberto Date: Thu, 5 Mar 2026 00:06:11 +0100 Subject: [PATCH] steps B.3/B.4/B.5 complete: bidirectional WS handler, _tool_loop verified, clean final frame --- AI_REFACTOR_PLAN.md | 6 +++--- app/core/orchestrator.py | 14 ++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/AI_REFACTOR_PLAN.md b/AI_REFACTOR_PLAN.md index db662bd..5c9d2e3 100644 --- a/AI_REFACTOR_PLAN.md +++ b/AI_REFACTOR_PLAN.md @@ -158,7 +158,7 @@ Tools must use **camelCase** field names (Drizzle maps them to snake_case intern - **Outcome:** All 23 tools query real user data via WS. LLM sees actual rows, not action descriptors. ### Step B.3 — Bidirectional WebSocket handler -- [ ] Refactor `app/api/routes/chat.py` WS endpoint: +- [x] Refactor `app/api/routes/chat.py` WS endpoint: - After auth + accept + receive `chat_request`: 1. Create `execute_on_client` callback closure capturing the websocket: ```python @@ -192,13 +192,13 @@ Tools must use **camelCase** field names (Drizzle maps them to snake_case intern - **Outcome:** Full bidirectional WS. Tool calls and text streaming happen concurrently on the same connection. ### Step B.4 — `_tool_loop` — no changes needed -- [ ] Verify `app/core/agent_registry.py` works unchanged: +- [x] Verify `app/core/agent_registry.py` works unchanged: - `_tool_loop` calls `tool_fn.ainvoke(args)` → tool awaits `execute_on_client()` (WS round-trip) → returns string → `ToolMessage(content=string)` → LLM sees real data - The async WS round-trip happens inside each tool. `_tool_loop` just sees an awaited tool returning a string — same as before, different content. - **No code changes.** Just verify + add a log line for tool execution times if desired. ### Step B.5 — Orchestrator cleanup -- [ ] Update `app/core/orchestrator.py`: +- [x] Update `app/core/orchestrator.py`: - `orchestrate_stream()`: remove `"actions": []` from final frame. Final becomes: `{"done": true, "response": "..."}` - No other changes — `classify_intent` → `call_agent` → chunk response → final frame - **Files:** `app/core/orchestrator.py` diff --git a/app/core/orchestrator.py b/app/core/orchestrator.py index 4b5afac..982ef30 100644 --- a/app/core/orchestrator.py +++ b/app/core/orchestrator.py @@ -144,14 +144,15 @@ async def orchestrate_stream( request: ChatRequest, reg: AgentRegistry | None = None, ) -> AsyncGenerator[str, None]: - """Streaming orchestration — yields text chunks then a final JSON frame. + """Streaming orchestration — yields plain text chunks only. - The final frame is a JSON object: - ``{"done": true, "response": "...", "actions": []}``. + The WebSocket handler in ``app/api/routes/chat.py`` is responsible for + wrapping each chunk in a ``text_chunk`` frame and sending the final + ``final`` frame once the generator is exhausted. Agents do not yet support token-level streaming; the full response is - fetched first, then emitted in fixed-size chunks. Token-level streaming - will be wired in Step 6 when agents expose ``astream()``. + fetched first (which may involve multiple WS round-trips for tool calls), + then emitted in fixed-size chunks. """ if reg is None: reg = _default_registry @@ -163,6 +164,3 @@ async def orchestrate_stream( chunk_size = 50 for i in range(0, len(response_text), chunk_size): yield response_text[i : i + chunk_size] - - final = ChatResponse(response=response_text) - yield json.dumps({"done": True, **final.model_dump()})