feat: add WS Gateway and Chat Service (Step 2)

WS Gateway: - WebSocket lifecycle handler with RS256 JWT auth - Redis bridge: device registry, frame publishing, tool_result routing - Inbound routing: tool_result→LPUSH, home/floating→chat pub/sub - Outbound: subscribes to ws:out:{user_id}, forwards to Electron - Single-worker Dockerfile (long-lived WS connections) Chat Service: - Redis consumer: subscribes to chat:request:* pattern - Redis-based ws_context: tool_call→publish, BRPOP tool_result (30s timeout) - deep_agent: single-agent runner with home/floating/stream variants - memory_middleware: core/associative/episodic/proactive memory with Fernet - Domain agents: task (8 tools), note (5), project (6), timeline (4) - LLM factory via LiteLLM (100+ providers) - Output formatter (StreamFormatter) - POST /chat REST fallback with Traefik header auth - Multi-worker Dockerfile with 120s timeout for LLM calls
2026-03-22 01:20:11 +01:00
parent 1e2e395676
commit 90018af311
21 changed files with 2731 additions and 1 deletions
--- a/services/chat/app/llm.py
+++ b/services/chat/app/llm.py
@@ -0,0 +1,77 @@
+"""LLM factory — centralised model instantiation via LiteLLM.
+
+Adapted from app/core/llm.py for the Chat Service.
+Uses shared.config.settings instead of app.config.settings.
+"""
+
+from __future__ import annotations
+
+import os
+import warnings
+
+from openai import AsyncOpenAI
+import litellm
+
+from langchain_openai import ChatOpenAI
+from langchain_litellm import ChatLiteLLM
+
+from shared.config import settings
+
+litellm.drop_params = True
+
+warnings.filterwarnings(
+    "ignore",
+    message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`",
+    category=UserWarning,
+)
+
+
+def _api_key_for_model(model: str) -> str | None:
+    if model.startswith("anthropic/"):
+        return settings.ANTHROPIC_API_KEY or None
+    if model.startswith("gemini/") or model.startswith("google/"):
+        return settings.GOOGLE_API_KEY or None
+    if model.startswith("cerebras/"):
+        return settings.CEREBRAS_API_KEY or None
+    if model.startswith("github_copilot/"):
+        return None
+    return settings.OPENAI_API_KEY or None
+
+
+def get_llm(
+    *,
+    model: str | None = None,
+    temperature: float = 0,
+) -> ChatOpenAI | ChatLiteLLM:
+    model = model or settings.LLM_MODEL
+
+    if settings.GITHUB_COPILOT_TOKEN_DIR:
+        os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
+
+    if "/" in model:
+        return ChatLiteLLM(model=model, temperature=temperature)
+
+    return ChatOpenAI(
+        model=model,
+        temperature=temperature,
+        api_key=_api_key_for_model(model),
+    )
+
+
+def get_router_llm(
+    *,
+    temperature: float = 0,
+) -> ChatOpenAI | ChatLiteLLM:
+    return get_llm(model=settings.LLM_ROUTER_MODEL, temperature=temperature)
+
+
+async def embed(text: str) -> list[float]:
+    model = settings.LLM_EMBED_MODEL
+
+    if model.startswith("github_copilot/") or "/" in model:
+        response = await litellm.aembedding(model=model, input=[text])
+        return response.data[0]["embedding"]
+
+    client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
+    response = await client.embeddings.create(model=model, input=text)
+    return response.data[0].embedding