fix(scouts): fetch single Gmail message instead of bulk in fetch_content

Replace bulk GmailClient.fetch_messages() + linear search with a direct service.users().messages().get(format="full") call. Adds _extract_plain_text_body helper for recursive MIME part walking. Update test to patch _get_gmail_service. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
feat(scouts): add Gmail OAuth scout-setup routes
2026-05-16 05:39:39 +02:00 · 2026-05-16 04:54:10 +02:00 · 2026-05-16 04:36:49 +02:00 · 2026-05-16 04:31:57 +02:00 · 2026-05-16 04:26:16 +02:00 · 2026-05-16 04:18:33 +02:00
216 changed files with 17652 additions and 15476 deletions
--- a/.env.example
+++ b/.env.example
@@ -2,66 +2,94 @@
 ENV=dev
 # ── Database ──────────────────────────────────────────────────────────────────
-DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/adiuva
+DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/adiuvai
-# ── Redis ─────────────────────────────────────────────────────────────────────
+# ── Auth ──────────────────────────────────────────────────────────────────────
-REDIS_URL=redis://localhost:6379/0
+JWT_SECRET=replace-with-a-long-random-secret
-
+JWT_ALGORITHM=HS256
 # ── Auth (JWT RS256) ──────────────────────────────────────────────────────────
 # Generate keypair:
 #   openssl genpkey -algorithm RSA -out private.pem -pkeyopt rsa_keygen_bits:2048
 #   openssl rsa -in private.pem -pubout -out public.pem
 # Paste PEM content with literal \n for newlines.
 #
 # Private key — ONLY used by the Auth Service (JWT signing).
 JWT_PRIVATE_KEY=
 # Public key — used by all services / Traefik ForwardAuth (JWT verification).
 JWT_PUBLIC_KEY=
 JWT_ACCESS_TOKEN_EXPIRE_MINUTES=30
 JWT_REFRESH_TOKEN_EXPIRE_DAYS=30
 # ── LLM ───────────────────────────────────────────────────────────────────────
 # LiteLLM model identifiers — change to swap providers without code changes.
 # Examples: gpt-4o, anthropic/claude-sonnet-4-20250514, gemini/gemini-pro, ollama/llama3
 #
 # API keys — only the key(s) matching your chosen provider(s) are required.
 # The correct key is picked automatically from the model prefix (e.g.
 # "anthropic/..." → ANTHROPIC_API_KEY, "gemini/..." → GOOGLE_API_KEY).
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
 GOOGLE_API_KEY=
-LLM_MODEL=gpt-4o
+CEREBRAS_API_KEY=
 GROQ_API_KEY=
 DEEPSEEK_API_KEY=
 # Default model used by any agent that does not have a specific override below.
 LLM_MODEL=gpt-5-mini
 LLM_EMBED_MODEL=text-embedding-3-small
 # GitHub Copilot — leave empty to use the LiteLLM default token directory.
 # In Docker, point this to a named-volume path so tokens survive restarts.
 # GITHUB_COPILOT_TOKEN_DIR=
 # ── Per-agent model overrides ─────────────────────────────────────────────────
 # Leave a value empty to fall back to LLM_MODEL.
 # Each agent resolves its API key from the model prefix automatically.
 #
 # Intent classifier — routes user messages to the right domain agent.
 # A small/fast model (e.g. gpt-4o-mini) is usually sufficient here.
 LLM_MODEL_CLASSIFIER=
 # Home-agent — handles chat from the home screen (all tools available).
 LLM_MODEL_HOME_AGENT=
 # Floating-agent — handles contextual chat triggered from a task/project/note.
 LLM_MODEL_FLOATING_AGENT=
 # Unified-processor — processes local directory files (local agent runner).
 LLM_MODEL_UNIFIED_PROCESSOR=
 # Cloud-processor — fetches and processes data from cloud connectors.
 LLM_MODEL_CLOUD_PROCESSOR=
 # Brief-agent — produces home and project text briefs.
 # A small model (e.g. gpt-4o-mini) is sufficient.
 # LLM_MODEL_BRIEF_AGENT=
 # Task-brief-agent — per-task deep research (Stage 1 executive assistant).
 # Needs tool-use + reasoning; a capable model recommended (e.g. gpt-4o, gemini-2.5-flash).
 # LLM_MODEL_TASK_BRIEF_AGENT=
 # Setup-agent — guided journey to build an AgentConfig via WebSocket chat.
 LLM_MODEL_SETUP_AGENT=
 # Memory-extractor — Mem0-style extract/decide pipeline (Phase 2).
 # Defaults to gpt-4o-mini when empty (fast + cheap, temperature=0).
 LLM_MODEL_MEMORY_EXTRACTOR=
 # Memory-miner — proactive pattern mining from episodic history (Phase 5, Power+ only).
 # Defaults to gpt-4o-mini when empty.
 LLM_MODEL_MEMORY_MINER=
 # Memory-auditor — weekly contradiction scan + relation label canonicalization (Phase 7).
 # Defaults to LLM_MODEL when empty (a reasoning-capable model is recommended).
 LLM_MODEL_MEMORY_AUDITOR=
 # Scheduler — set to false to disable memory cron jobs (automatically false in tests).
 SCHEDULER_ENABLED=true
 # ── Stripe (leave empty to stub billing) ──────────────────────────────────────
 STRIPE_SECRET_KEY=
 STRIPE_WEBHOOK_SECRET=
 # ── AWS / S3 ──────────────────────────────────────────────────────────────────
 S3_BUCKET=adiuva
 S3_REGION=us-east-1
 S3_ENDPOINT_URL=
 AWS_ACCESS_KEY_ID=
 AWS_SECRET_ACCESS_KEY=
 # For MinIO (homelab): S3_ENDPOINT_URL=http://minio:9000
-# ── Vector Store ──────────────────────────────────────────────────────────────
+# ── Langfuse (leave empty to disable observability) ───────────────────────────
-# Pinecone is used when PINECONE_API_KEY is set; otherwise falls back to Qdrant.
+LANGFUSE_SECRET_KEY=
-PINECONE_API_KEY=
+LANGFUSE_PUBLIC_KEY=
-PINECONE_INDEX=adiuva
+# LANGFUSE_BASE_URL=https://cloud.langfuse.com        # EU (default)
-QDRANT_URL=
+# LANGFUSE_BASE_URL=https://us.cloud.langfuse.com     # US
-QDRANT_API_KEY=
+# LANGFUSE_BASE_URL=http://localhost:3000             # Self-hosted
 # For local Qdrant (homelab): QDRANT_URL=http://qdrant:6333
 # ── CORS ──────────────────────────────────────────────────────────────────────
 # Comma-separated list parsed by Settings (override default if needed)
 # CORS_ORIGINS=["app://.","http://localhost:3000"]
 # ── Langfuse (observability) ─────────────────────────────────────────────────
 LANGFUSE_SECRET_KEY=sk-lf-...
 LANGFUSE_PUBLIC_KEY=pk-lf-...
 LANGFUSE_HOST=https://cloud.langfuse.com  # or self-hosted URL
 # ── Cloudflare (Traefik ACME DNS-01 challenge) ───────────────────────────────
 CF_DNS_API_TOKEN=
 ACME_EMAIL=
 # ── PostgreSQL (used by docker-compose) ──────────────────────────────────────
 POSTGRES_USER=postgres
 POSTGRES_PASSWORD=postgres
 POSTGRES_DB=adiuva
--- a/.gitea/workflows/deploy.yaml
+++ b/.gitea/workflows/deploy.yaml
@@ -48,23 +48,23 @@ jobs:
          key: ${{ secrets.SSH_KEY }}
          script: |
            set -e
-            DEPLOY_DIR="/opt/adiuva-api"
+            DEPLOY_DIR="/opt/adiuvai-api"
            REPO_URL="http://10.0.0.119:3000/${{ gitea.repository }}.git"
            TAG="${{ gitea.ref_name }}"
            # ── Pull latest code ──
-            cd /tmp && rm -rf adiuva-api-deploy
+            cd /tmp && rm -rf adiuvai-api-deploy
-            git clone --depth 1 --branch "${TAG}" "${REPO_URL}" adiuva-api-deploy
+            git clone --depth 1 --branch "${TAG}" "${REPO_URL}" adiuvai-api-deploy
            # ── Sync source (preserve .env) ──
-            cp -rf /tmp/adiuva-api-deploy/app/ \
+            cp -rf /tmp/adiuvai-api-deploy/app/ \
-                   /tmp/adiuva-api-deploy/alembic/ \
+                   /tmp/adiuvai-api-deploy/alembic/ \
-                   /tmp/adiuva-api-deploy/alembic.ini \
+                   /tmp/adiuvai-api-deploy/alembic.ini \
-                   /tmp/adiuva-api-deploy/Dockerfile \
+                   /tmp/adiuvai-api-deploy/Dockerfile \
-                   /tmp/adiuva-api-deploy/docker-compose.yml \
+                   /tmp/adiuvai-api-deploy/docker-compose.yml \
-                   /tmp/adiuva-api-deploy/requirements.txt \
+                   /tmp/adiuvai-api-deploy/requirements.txt \
                   "$DEPLOY_DIR/"
-            rm -rf /tmp/adiuva-api-deploy
+            rm -rf /tmp/adiuvai-api-deploy
            # ── Verify .env ──
            if [ ! -f "$DEPLOY_DIR/.env" ]; then
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -58,7 +58,7 @@ jobs:
      - uses: actions/checkout@v4
      - name: Build image
-        run: docker build -t adiuva-api:ci .
+        run: docker build -t adiuvai-api:ci .
      - name: Verify gunicorn installed
-        run: docker run --rm adiuva-api:ci gunicorn --version
+        run: docker run --rm adiuvai-api:ci gunicorn --version
--- a/.gitignore
+++ b/.gitignore
@@ -13,9 +13,6 @@ env/
 # Environment variables
 .env
 # Cryptographic keys
 *.pem
 # IDE
 .vscode/
 .idea/
@@ -24,17 +21,18 @@ env/
 .pytest_cache/
 htmlcov/
 .coverage
 tests/fixtures/private*/
 # Docker
 *.log
 # OS
 .DS_Store
 # Smoke scripts (dev-only, not for CI)
 scripts/smoke_*.py
 Thumbs.db
 # Claude Code
 .claude/
 logs/
 # Eval private test data
 services/batch-agent/eval/fixtures/private_data/
--- a/services/chat/Dockerfile
+++ b/services/chat/Dockerfile
@@ -3,34 +3,37 @@ FROM python:3.12-slim AS builder
 WORKDIR /build
-COPY services/chat/requirements.txt ./requirements.txt
+COPY requirements.txt .
 RUN pip install --upgrade pip && \
    pip install --no-cache-dir --prefix=/install -r requirements.txt
 # ── runtime ──────────────────────────────────────────────────────────────────
 FROM python:3.12-slim AS runtime
 # Non-root user
 RUN addgroup --system appgroup && adduser --system --ingroup appgroup appuser
 WORKDIR /app
 # Copy installed packages from builder
 COPY --from=builder /install /usr/local
-# Shared module
+# Copy application source
-COPY shared/ shared/
+COPY app/ app/
-# Service source
+# Copy Alembic migration files
-COPY services/chat/app/ app/
+COPY alembic/ alembic/
 COPY alembic.ini .
 # Ensure appuser owns the working directory
 RUN chown -R appuser:appgroup /app
 USER appuser
 EXPOSE 8000
 # Chat service is CPU-bound (LLM calls) — use multiple workers
 CMD ["gunicorn", "app.main:app", \
     "-k", "uvicorn.workers.UvicornWorker", \
     "--bind", "0.0.0.0:8000", \
-     "--workers", "2", \
+     "--workers", "4", \
     "--timeout", "120"]
--- a/README.md
+++ b/README.md
@@ -1,793 +1,5 @@
-# Adiuva Cloud API
+## DEV
-
+Run in DEV with command:
 **AI-powered project management backend with E2E encrypted cloud storage, LLM orchestration, and a plugin marketplace.**
 Built with FastAPI · Python 3.12 · PostgreSQL · LangChain · Stripe · AWS S3
 ---
 ## Table of Contents
 - [Overview](#overview)
 - [Architecture](#architecture)
 - [Key Features](#key-features)
 - [Tech Stack](#tech-stack)
 - [Getting Started](#getting-started)
 - [Docker Deployment](#docker-deployment)
 - [Environment Variables](#environment-variables)
 - [API Reference](#api-reference)
 - [Data Model](#data-model)
 - [AI Agent System](#ai-agent-system)
 - [Orchestration & Execution Plans](#orchestration--execution-plans)
 - [Middleware](#middleware)
 - [Storage Layer](#storage-layer)
 - [Billing & Tiers](#billing--tiers)
 - [Plugin Marketplace](#plugin-marketplace)
 - [Testing](#testing)
 - [Project Structure](#project-structure)
 - [License](#license)
 ---
 ## Overview
 Adiuva Cloud API is the FastAPI backend that powers the **Adiuva Electron desktop app**. It provides LLM-powered chat orchestration, end-to-end encrypted cloud storage, a vector search engine, an encrypted backup system, a plugin marketplace with revenue sharing, and Stripe-based subscription billing across four tiers.
 ### Design Principles
 1. **Never persist user data in plaintext** — the database stores only auth, billing, storage metadata, and marketplace data. All user content is E2E encrypted by the client before reaching the server.
 2. **Never expose prompts** — system prompts stay server-side; responses are sanitized to strip any leaked prompt fragments.
 3. **Never decrypt user blobs** — the backend performs only checksum verification; no decryption keys ever reach the server.
 4. **Stateless request handling** — all context comes from the client and JWT; no server-side session state.
 5. **Tier gates enforced server-side** — the server always reads the current tier from the database, never trusting client-reported values.
 ---
 ## Architecture
 ```
-┌──────────────┐      ┌────────────────────────────────────────────────────────┐
+uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload --log-config logging.conf
-│  Electron    │      │  FastAPI  (Uvicorn / Gunicorn)                         │
+```
 │  Desktop App │────▶│                                                        │
 │  (Client)    │◀────│  Middleware: RateLimit → Sanitizer → CORS → Router     │
 └──────────────┘      │                                                        │
                      │  ┌──────────────────┐  ┌────────────────────────────┐  │
                      │  │  Auth Routes     │  │  Chat Routes               │  │
                      │  │  Billing Routes  │  │    ↓                       │  │
                      │  │  Storage Routes  │  │  Orchestrator (GPT-4o-mini)│  │
                      │  │  Backup Routes   │  │    ↓ classify intent       │  │
                      │  │  Plugin Routes   │  │  Agent Registry            │  │
                      │  │  Vector Routes   │  │    ↓                       │  │
                      │  │  Plans Routes    │  │  TaskAgent  | ProjectAgent │  │
                      │  └──────────────────┘  │  NoteAgent  | CheckptAgent │  │
                      │                        │  (GPT-4o + LangChain)      │  │
                      │                        └────────────────────────────┘  │
                      └────────────────────────────────────────────────────────┘
                               │              │              │
                      ┌────────▼───┐  ┌───────▼───────┐  ┌──▼─────────────┐
                      │ PostgreSQL │  │  AWS S3       │  │ Pinecone /     │
                      │ (Auth,     │  │  (E2E blobs,  │  │ Qdrant         │
                      │  Billing,  │  │   backups)    │  │ (Vectors)      │
                      │  Metadata) │  └───────────────┘  └────────────────┘
                      └────────────┘
                               │
                      ┌────────▼───┐
                      │  Stripe    │
                      │  (Billing, │
                      │   Connect) │
                      └────────────┘
 ```
 ---
 ## Key Features
 1. **LLM-powered orchestration** — GPT-4o-mini classifies user intent and routes to the appropriate domain agent.
 2. **4 specialized AI agents** — Tasks (8 tools), Projects (6 tools), Timelines (4 tools), Notes (5 tools), all powered by GPT-4o via LangChain.
 3. **Execution plans & playbooks** — Server-side prompt template registry; clients receive only opaque template IDs, never raw prompts.
 4. **E2E encrypted cloud storage** — The backend never decrypts user data; SHA-256 checksum verification uses constant-time comparison to prevent timing attacks.
 5. **Cloud vector store** — Pinecone or Qdrant with user-isolated namespaces and encrypted blob payloads.
 6. **Encrypted backup system** — Tiered storage limits with `If-Modified-Since` support for efficient syncing.
 7. **Plugin marketplace** — Catalog, admin review/approval workflow, security checklist, and 70/30 revenue sharing via Stripe Connect.
 8. **Stripe billing** — Four-tier subscription model (Free / Pro / Power / Team) with checkout sessions and full webhook lifecycle handling.
 9. **JWT authentication** — Access + refresh tokens with bcrypt password hashing, SHA-256 token hashing, and automatic rotation.
 10. **Prompt IP protection** — Sanitizer middleware strips system prompts, reasoning markers, tool schemas, and agent routing metadata from all chat responses.
 11. **Tier-based rate limiting** — Sliding-window per-user limiter scaling from 20 to 200 requests/min by subscription tier.
 12. **Zero-trust data model** — User content is never stored in plaintext; the database holds only authentication, billing, and metadata records.
 13. **WebSocket streaming** — Real-time chat with 30-second heartbeat keep-alive and chunked text delivery.
 14. **Alembic migrations** — Versioned schema management with seed data for the plugin marketplace.
 15. **Comprehensive test suite** — In-memory SQLite + moto S3 mocks, per-tier test fixtures, and full API coverage without external dependencies.
 ---
 ## Tech Stack
 | Package | Version | Purpose |
 |---|---|---|
 | `fastapi` | ≥ 0.115.0 | Web framework |
 | `uvicorn[standard]` | ≥ 0.34.0 | ASGI development server |
 | `gunicorn` | ≥ 22.0.0 | Production process manager |
 | `langchain` | ≥ 0.3.0 | LLM orchestration framework |
 | `langchain-openai` | ≥ 0.3.0 | OpenAI LLM provider integration |
 | `litellm` | ≥ 1.50.0 | Universal LLM gateway (100+ providers) |
 | `pydantic` | ≥ 2.10.0 | Data validation and serialization |
 | `pydantic-settings` | ≥ 2.7.0 | Environment-based configuration |
 | `python-jose[cryptography]` | ≥ 3.3.0 | JWT encoding and decoding |
 | `stripe` | ≥ 11.0.0 | Billing and payment integration |
 | `boto3` | ≥ 1.35.0 | AWS S3 client |
 | `slowapi` | ≥ 0.1.9 | Rate limiting utilities |
 | `sqlalchemy` | ≥ 2.0.0 | Async ORM and query builder |
 | `asyncpg` | ≥ 0.30.0 | PostgreSQL async driver |
 | `alembic` | ≥ 1.14.0 | Database migration management |
 | `bcrypt` | ≥ 4.2.0 | Password hashing |
 | `python-dotenv` | ≥ 1.0.0 | `.env` file loading |
 | `httpx` | ≥ 0.28.0 | Async HTTP client (used in tests) |
 | `websockets` | ≥ 14.0 | WebSocket protocol support |
 | `psycopg2-binary` | ≥ 2.9.0 | Synchronous PostgreSQL driver (Alembic) |
 | `pinecone` | ≥ 5.0.0 | Pinecone vector store client |
 | `qdrant-client` | ≥ 1.7.0 | Qdrant vector store client |
 | `pytest` | ≥ 8.0.0 | Test framework |
 | `pytest-asyncio` | ≥ 0.24.0 | Async test support |
 | `aiosqlite` | ≥ 0.20.0 | In-memory SQLite for tests |
 | `moto[s3]` | ≥ 5.0.0 | AWS S3 mock for tests |
 | `ruff` | ≥ 0.8.0 | Linter and formatter |
 ---
 ## Getting Started
 ### Prerequisites
 - Python 3.12+
 - PostgreSQL 16+
 - An OpenAI API key (for LLM features)
 - Stripe API keys (optional — billing stubs gracefully when unconfigured)
 - AWS credentials (optional — needed for S3 storage in production)
 ### Installation
 ```bash
 # Clone the repository
 git clone <repo-url> && cd adiuva-api
 # Create a virtual environment
 python -m venv .venv && source .venv/bin/activate
 # Install dependencies
 pip install -r requirements.txt
 # Configure environment
 cp .env.example .env
 # Edit .env with your DATABASE_URL, OPENAI_API_KEY, etc.
 ```
 ### Database Setup
 ```bash
 # Start PostgreSQL (or use the Docker Compose database)
 docker compose up db -d
 # Run migrations
 alembic upgrade head
 ```
 ### Run the Development Server
 ```bash
 uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
 ```
 Interactive API docs are available at [http://localhost:8000/docs](http://localhost:8000/docs) in development mode (`ENV=dev`). The `/docs` endpoint is disabled in production.
 ---
 ## Docker Deployment
 ### Quick Start
 ```bash
 docker compose up --build
 ```
 This starts two services:
 - **app** — FastAPI server on port `8000`
 - **db** — PostgreSQL 16 (Alpine) on port `5432` with a persistent volume and health checks
 The compose file also includes optional services for fully local deployments:
 - **minio** — S3-compatible object storage on ports `9000` (API) and `9001` (console)
 - **qdrant** — Vector search engine on ports `6333` (HTTP) and `6334` (gRPC)
 ### Dockerfile Details
 The Dockerfile uses a multi-stage build:
 1. **Builder stage** — Installs Python dependencies into a virtual environment.
 2. **Runtime stage** — Copies only the venv, app source, and Alembic migrations. Runs as a non-root user (`appuser`).
 3. **Production server** — Gunicorn with 4 Uvicorn workers, 120-second timeout, listening on port 8000.
 ```bash
 # Production command (run by the container)
 gunicorn app.main:app -k uvicorn.workers.UvicornWorker -w 4 --timeout 120 -b 0.0.0.0:8000
 ```
 ---
 ## Homelab / Self-Hosted Deployment
 You can run the entire stack locally on a homelab with **no cloud dependencies except the LLM provider**. The compose file includes MinIO (S3 replacement) and Qdrant (vector store) out of the box.
 ### 1. Start all services
 ```bash
 docker compose up -d
 ```
 This starts PostgreSQL, MinIO, and Qdrant alongside the app.
 ### 2. Create the MinIO bucket
 Open the MinIO console at [http://localhost:9001](http://localhost:9001) (login: `minioadmin` / `minioadmin`) and create a bucket named `adiuva`, or use the CLI:
 ```bash
 docker compose exec minio mc alias set local http://localhost:9000 minioadmin minioadmin
 docker compose exec minio mc mb local/adiuva
 ```
 ### 3. Configure your `.env`
 ```bash
 # Database (uses the compose PostgreSQL)
 DATABASE_URL=postgresql+asyncpg://postgres:postgres@db:5432/adiuva
 # S3 → MinIO
 S3_BUCKET=adiuva
 S3_REGION=us-east-1
 S3_ENDPOINT_URL=http://minio:9000
 AWS_ACCESS_KEY_ID=minioadmin
 AWS_SECRET_ACCESS_KEY=minioadmin
 # Vector store → local Qdrant (leave PINECONE_API_KEY empty)
 QDRANT_URL=http://qdrant:6333
 QDRANT_API_KEY=
 PINECONE_API_KEY=
 # Billing — leave empty to stub (no Stripe needed)
 STRIPE_SECRET_KEY=
 STRIPE_WEBHOOK_SECRET=
 # LLM — the only external service
 OPENAI_API_KEY=sk-...
 LLM_MODEL=gpt-4o
 LLM_ROUTER_MODEL=gpt-4o-mini
 # Auth
 JWT_SECRET=your-secret-here
 ENV=dev
 ```
 ### 4. Run migrations
 ```bash
 docker compose exec app alembic upgrade head
 ```
 ### What runs where
 | Service | Runs on | Port | Notes |
 |---|---|---|---|
 | FastAPI app | Docker | 8000 | API server |
 | PostgreSQL | Docker | 5432 | Auth, billing, metadata |
 | MinIO | Docker | 9000 / 9001 | S3-compatible blob & backup storage |
 | Qdrant | Docker | 6333 / 6334 | Vector search (replaces Pinecone) |
 | Stripe | — | — | Stubbed when keys are empty |
 | OpenAI / LLM | Cloud | — | Only external dependency |
 > **Want fully offline AI too?** Set `LLM_MODEL=ollama/llama3` and `LLM_ROUTER_MODEL=ollama/llama3`, then add an Ollama container or point at a local Ollama instance. See the [LLM provider switching](#switching-llm-providers) section.
 ---
 ## Environment Variables
 All variables are loaded from a `.env` file via Pydantic Settings. Source: `app/config/settings.py`
 | Variable | Type | Default | Description |
 |---|---|---|---|
 | `DATABASE_URL` | `str` | `postgresql+asyncpg://postgres:postgres@localhost:5432/adiuva` | Async SQLAlchemy connection string |
 | `JWT_SECRET` | `str` | `change-me-in-production` | HMAC secret for JWT signing |
 | `JWT_ALGORITHM` | `str` | `HS256` | JWT signing algorithm |
 | `JWT_ACCESS_TOKEN_EXPIRE_MINUTES` | `int` | `30` | Access token time-to-live |
 | `JWT_REFRESH_TOKEN_EXPIRE_DAYS` | `int` | `30` | Refresh token time-to-live |
 | `STRIPE_SECRET_KEY` | `str` | `""` | Stripe API key (empty = stub mode) |
 | `STRIPE_WEBHOOK_SECRET` | `str` | `""` | Stripe webhook signature secret |
 | `S3_BUCKET` | `str` | `""` | S3 bucket for encrypted blobs and backups |
 | `S3_REGION` | `str` | `us-east-1` | AWS region |
 | `S3_ENDPOINT_URL` | `str` | `""` | Custom S3 endpoint (e.g. `http://minio:9000` for MinIO). Leave empty for AWS. |
 | `AWS_ACCESS_KEY_ID` | `str` | `""` | AWS credentials |
 | `AWS_SECRET_ACCESS_KEY` | `str` | `""` | AWS credentials |
 | `PINECONE_API_KEY` | `str` | `""` | Pinecone API key (if set, Pinecone is used for vectors) |
 | `PINECONE_INDEX` | `str` | `adiuva` | Pinecone index name |
 | `QDRANT_URL` | `str` | `""` | Qdrant URL (used when Pinecone is not configured) |
 | `QDRANT_API_KEY` | `str` | `""` | Qdrant API key |
 | `OPENAI_API_KEY` | `str` | `""` | OpenAI key for LLM agent calls |
 | `LLM_MODEL` | `str` | `gpt-4o` | LiteLLM model identifier for agents (e.g. `anthropic/claude-3.5-sonnet`, `gemini/gemini-pro`, `ollama/llama3`) |
 | `LLM_ROUTER_MODEL` | `str` | `gpt-4o-mini` | Lighter model used for intent classification / routing |
 | `CORS_ORIGINS` | `list[str]` | `["app://.", "http://localhost:3000", "http://localhost:5173"]` | Allowed CORS origins |
 | `ENV` | `Literal` | `dev` | `dev` or `prod` — controls `/docs` visibility and SQL echo |
 ---
 ## API Reference
 All routes are prefixed with `/api/v1`. **27 endpoints** total (25 REST + 1 WebSocket + 1 health check).
 ### Health
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `GET` | `/api/v1/health` | No | Returns `{"status": "ok", "version": "0.1.0"}` |
 ### Auth
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `POST` | `/api/v1/auth/register` | No | Create account with bcrypt-hashed password, returns `AuthTokens` |
 | `POST` | `/api/v1/auth/login` | No | Validate credentials, returns `AuthTokens` |
 | `POST` | `/api/v1/auth/refresh` | No | Rotate refresh token, returns new `AuthTokens` |
 | `GET` | `/api/v1/auth/me` | JWT | Returns `UserProfile` for the authenticated user |
 ### Chat
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `POST` | `/api/v1/chat` | JWT | Route message through the orchestrator; returns `ChatResponse` or `ExecutionPlan` depending on execution mode |
 | `WS` | `/api/v1/chat/stream` | JWT (query param `?token=`) | Streaming chat — first frame is a `ChatRequest`, server yields text chunks, final frame is `{"done": true, "response": "...", "actions": [...]}`. 30-second heartbeat ping. |
 ### Plans
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `GET` | `/api/v1/plans/playbook` | JWT | List all cached execution plan playbooks |
 | `GET` | `/api/v1/plans/playbook/{plan_id}` | JWT | Retrieve a specific playbook by ID |
 ### Storage (Cloud Records)
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `POST` | `/api/v1/storage/records` | JWT | Upload an E2E encrypted record (verifies checksum, enforces storage quota) |
 | `GET` | `/api/v1/storage/records` | JWT | List record metadata with pagination (`?table`, `?page`, `?limit`); no blob bytes returned |
 | `GET` | `/api/v1/storage/records/{id}` | JWT | Download encrypted blob with `X-Checksum` response header |
 | `PUT` | `/api/v1/storage/records/{id}` | JWT | Replace an existing blob (verifies checksum, enforces quota) |
 | `DELETE` | `/api/v1/storage/records/{id}` | JWT | Delete a record and its S3 blob |
 ### Vectors (Cloud Vector Store)
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `POST` | `/api/v1/storage/vectors/upsert` | JWT | Verify checksums and upsert encrypted vectors |
 | `POST` | `/api/v1/storage/vectors/search` | JWT | Search user-scoped vector namespace |
 | `DELETE` | `/api/v1/storage/vectors` | JWT | Delete vectors by ID list |
 ### Backup
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `PUT` | `/api/v1/backup` | JWT | Upload encrypted backup blob with custom headers (`X-Backup-Version`, `X-Backup-Timestamp`, `X-Backup-Checksum`). Tier quota enforced. |
 | `GET` | `/api/v1/backup` | JWT | Download latest backup blob. Supports `If-Modified-Since`. |
 | `GET` | `/api/v1/backup/history` | JWT | List backup metadata (no blob content) |
 | `DELETE` | `/api/v1/backup/{backup_id}` | JWT | Delete a specific backup |
 ### Plugins (Marketplace)
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `GET` | `/api/v1/plugins` | JWT (Power+) | Browse the marketplace (`?category`, `?q`, `?page`, `?sort=rating\|installs\|newest`) |
 | `GET` | `/api/v1/plugins/{id}` | JWT (Power+) | Plugin detail with install count and ratings |
 | `POST` | `/api/v1/plugins/{id}/install` | JWT (Power+) | Install plugin; triggers Stripe Connect revenue split for paid plugins |
 | `DELETE` | `/api/v1/plugins/{id}/install` | JWT | Uninstall plugin |
 ### Billing
 | Method | Path | Auth | Description |
 |---|---|---|---|
 | `POST` | `/api/v1/billing/checkout` | JWT | Create a Stripe checkout session, returns `{"checkout_url": "..."}` |
 | `POST` | `/api/v1/billing/webhook` | Stripe signature | Handle Stripe events: `checkout.session.completed`, `customer.subscription.updated`, `customer.subscription.deleted`, `invoice.payment_failed` |
 | `GET` | `/api/v1/billing/subscription` | JWT | Get current subscription information |
 | `DELETE` | `/api/v1/billing/subscription` | JWT | Cancel subscription and revert to free tier |
 ---
 ## Data Model
 9 tables managed by Alembic migrations. Source: `app/models.py`
 ### Tables
 | Table | Primary Key | Key Columns | Purpose |
 |---|---|---|---|
 | `users` | `id` (UUID) | `email` (unique), `password_hash`, `tier`, `stripe_customer_id`, timestamps | User accounts |
 | `refresh_tokens` | `id` (UUID) | `user_id` (FK), `token_hash` (SHA-256, unique), `expires_at` | Hashed refresh tokens for rotation |
 | `subscriptions` | `id` (UUID) | `user_id` (FK, unique), `stripe_subscription_id`, `tier`, `status`, `current_period_end` | Stripe subscription records |
 | `storage_records` | `id` (UUID) | `user_id` (FK), `table_name`, `s3_key`, `checksum`, `size_bytes`, timestamps | S3 blob metadata (no plaintext content) |
 | `backup_metadata` | `id` (UUID) | `user_id` (FK), `s3_key`, `version`, `timestamp`, `checksum`, `size_bytes` | Backup manifests |
 | `plugins` | `id` (String) | `name`, `description`, `version`, `author_id` (FK), `category`, `price_cents`, `permissions` (JSON), `status`, `s3_package_key`, `install_count`, `avg_rating` | Marketplace plugin catalog |
 | `plugin_installations` | `id` (UUID) | `plugin_id` (FK), `user_id` (FK), unique constraint on (`plugin_id`, `user_id`) | Per-user install tracking |
 | `plugin_reviews` | `id` (UUID) | `plugin_id` (FK), `reviewer_id` (FK), `decision`, `notes`, `reviewed_at` | Admin review decisions |
 | `revenue_events` | `id` (UUID) | `plugin_id` (FK), `user_id` (FK), `amount_cents`, `developer_share_cents`, `stripe_transfer_id` | 70/30 revenue split ledger |
 ### Enum Types
 | Enum | Values |
 |---|---|
 | `billing_tier` | `free`, `pro`, `power`, `team` |
 | `plugin_status` | `pending_review`, `approved`, `rejected` |
 | `review_decision` | `approved`, `rejected` |
 ### Migrations
 | Version | Description |
 |---|---|
 | `001_initial_schema` | Creates all 9 tables with indexes and foreign key constraints |
 | `002_seed_plugins` | Seeds 3 approved plugins: GitHub Sync (free), Slack Notifier (€4.99), Time Tracker (€9.99) |
 ---
 ## AI Agent System
 The agent system uses a registry pattern with LangChain tool-calling agents powered by GPT-4o. Source: `app/agents/`, `app/core/agent_registry.py`
 ### Architecture
 - **`BaseAgent`** — Abstract base with `user_id`, `shared_memory`, and `vector_store_context`.
 - **`ChatAgent(BaseAgent)`** — Abstract `handle(query, context)` and `get_tools()` methods, plus a shared `_tool_loop(llm, messages, tools, max_iter=5)` for iterative tool calling.
 - **`AgentRegistry`** — Singleton registry with `@register` decorator, `get(name)`, `list_agents()`, and `call_agent(name, query, context)`.
 ### Registered Agents
 | Agent | Registry Name | Tools | Description |
 |---|---|---|---|
 | **TaskAgent** | `task_agent` | 8 | Full task and comment CRUD. Status: `todo` / `in_progress` / `done`. Priority: `high` / `medium` / `low`. Tools: `list_tasks`, `create_task`, `update_task`, `delete_task`, `list_tasks_due_today`, `list_task_comments`, `add_task_comment`, `delete_task_comment` |
 | **ProjectAgent** | `project_agent` | 6 | Project lifecycle management. Status: `active` / `archived`. Prefers archiving over deletion. Tools: `list_projects`, `list_all_projects`, `get_project`, `create_project`, `update_project`, `delete_project` |
 | **TimelineAgent** | `timeline_agent` | 4 | Project milestones. Requires `project_id` for creation. Supports AI-suggestion and approval workflows. Tools: `list_timelines`, `create_timeline`, `update_timeline`, `delete_timeline` |
 | **NoteAgent** | `note_agent` | 5 | Markdown note management. Optionally linked to projects. Tools: `list_notes`, `get_note`, `create_note`, `update_note`, `delete_note` |
 All agents use the model configured by `LLM_MODEL` (default: GPT-4o) with `temperature=0` via LiteLLM. Tools return JSON action descriptors that the Electron client interprets and applies locally.
 ### Switching LLM Providers
 The backend uses **LiteLLM** as a universal LLM gateway. All agents and the orchestrator instantiate models through a centralized factory in `app/core/llm.py`. To switch providers, change environment variables — no code changes required:
 ```bash
 # OpenAI (default)
 LLM_MODEL=gpt-4o
 LLM_ROUTER_MODEL=gpt-4o-mini
 # Anthropic
 LLM_MODEL=anthropic/claude-3.5-sonnet
 LLM_ROUTER_MODEL=anthropic/claude-3-haiku
 # Google Gemini
 LLM_MODEL=gemini/gemini-pro
 LLM_ROUTER_MODEL=gemini/gemini-flash
 # Local Ollama
 LLM_MODEL=ollama/llama3
 LLM_ROUTER_MODEL=ollama/llama3
 # AWS Bedrock
 LLM_MODEL=bedrock/anthropic.claude-v2
 LLM_ROUTER_MODEL=bedrock/anthropic.claude-instant-v1
 ```
 See the [LiteLLM provider docs](https://docs.litellm.ai/docs/providers) for the full list of 100+ supported providers and model naming conventions.
 ---
 ## Orchestration & Execution Plans
 Source: `app/core/orchestrator.py`, `app/core/execution_plan.py`
 ### Orchestrator
 1. **`classify_intent(message, context, registry)`** — Uses the router model (`LLM_ROUTER_MODEL`, default: GPT-4o-mini) to determine which agent should handle a message. Falls back to `task_agent` when classification is ambiguous.
 2. **`route_single(agent_name, message, context)`** — Routes to a single agent and returns a `ChatResponse`.
 3. **`route_pipeline(agent_names, message, context)`** — Executes agents sequentially; each receives `previous_results` from earlier agents. A final LLM synthesis step merges all results.
 4. **`orchestrate(request)`** — Main entry point. In `direct` mode, returns a `ChatResponse`. In `plan` mode, returns an `ExecutionPlan`.
 5. **`orchestrate_stream(request)`** — Streaming variant that yields 50-character text chunks with a final JSON frame.
 ### Execution Plans
 - **`PromptTemplateRegistry`** — Maps template IDs to server-side prompt text. Clients only ever see opaque IDs, never raw prompts.
 - **`ExecutionPlanBuilder`** — Fluent builder API: `add_step()`, `add_llm_step(template_id, vars)`, `add_data_step(action, data_from_step)`. Validates step references on `build()`.
 - **`PlanCache`** — LRU cache (maxsize 1000) for storing plans as reusable playbooks.
 ### Built-in Templates (6)
 `tpl_task_agent_default`, `tpl_timeline_agent_default`, `tpl_project_agent_default`, `tpl_note_agent_default`, `tpl_task_extract_from_project`, `tpl_note_weekly_summary`
 ### Built-in Playbooks (2)
 | Playbook | Description |
 |---|---|
 | `create_tasks_from_project` | LLM extracts actionable tasks from project context, then creates task records |
 | `generate_weekly_note` | LLM generates a weekly summary, then creates a note record |
 ---
 ## Middleware
 Middleware executes in this order on each request: **TierRateLimit → Sanitizer → CORS → Router**
 ### JWT Authentication
 Source: `app/api/middleware/auth.py`
 - FastAPI dependency `get_current_user` validates the `Bearer` JWT and extracts `user_id` and `email`.
 - **Live tier lookup** — The current tier is fetched from the `subscriptions` table on every request (not cached in the JWT), so upgrades and downgrades take immediate effect.
 - Falls back to `free` when no subscription row exists.
 - Raises `401 Unauthorized` on invalid or expired tokens.
 - **Exempt paths:** `/api/v1/auth/register`, `/api/v1/auth/login`, `/api/v1/billing/webhook`
 ### Tier-Based Rate Limiter
 Source: `app/api/middleware/rate_limit.py`
 - `TierRateLimitMiddleware` — Sliding-window in-process rate limiter (no Redis dependency).
 - Per-user 60-second window sized by subscription tier:
 | Tier | Requests / Minute |
 |---|---|
 | Free | 20 |
 | Pro | 60 |
 | Power | 120 |
 | Team | 200 |
 - Returns `429 Too Many Requests` with a `Retry-After` header when the limit is exceeded.
 - **Exempt paths:** register, login, webhook, health
 ### Response Sanitizer
 Source: `app/api/middleware/sanitizer.py`
 - Runs only on `/api/v1/chat` endpoints.
 - Scans JSON response bodies and replaces leaked prompt IP fragments with `[REDACTED]`.
 - Detects: system prompt openers, agent routing metadata, LangChain tool schemas, internal reasoning markers (`<thinking>`, `[INST]`), and known prompt fingerprints.
 - Logs sanitization events as `WARNING`.
 - Binary responses (storage, backup) are never touched.
 ---
 ## Storage Layer
 ### Blob Store
 Source: `app/storage/blob_store.py`
 - S3-backed storage for E2E encrypted blobs.
 - Object keys follow the pattern: `{user_id}/{table}/{record_id}`
 - Server-side SSE-S3 encryption at rest (additional layer on top of client-side E2E encryption).
 - Methods: `upload()`, `download()`, `delete()` (idempotent), `list_keys()`
 - The backend **never inspects or decrypts blob content**.
 ### Vector Store
 Source: `app/storage/vector_store.py`
 - Runtime-configurable: **Pinecone** (when `PINECONE_API_KEY` is set) or **Qdrant** (fallback).
 - User isolation: Pinecone uses `namespace=user_id`; Qdrant filters by `user_id` payload field.
 - 32-dimensional SHA-256-derived float vectors (deterministic, not semantically meaningful on encrypted data — a documented trade-off for privacy).
 - Encrypted blobs are stored as base64 in metadata/payload for verbatim retrieval.
 - Methods: `upsert()`, `search()`, `delete()`
 ### Encryption Utilities
 Source: `app/storage/encryption.py`
 - `verify_checksum(blob, checksum)` — SHA-256 hash comparison using `hmac.compare_digest` (constant-time to prevent timing attacks).
 - `reject_if_tampered(blob, checksum)` — Raises HTTP 400 on checksum mismatch.
 - **No decryption key ever reaches the backend.**
 ---
 ## Billing & Tiers
 Source: `app/billing/stripe_service.py`, `app/billing/tier_manager.py`
 ### Feature Matrix
 | Feature | Free | Pro | Power | Team |
 |---|---|---|---|---|
 | AI Agents | 3 | Unlimited | Unlimited | Unlimited |
 | Batch Active | 2 | 10 | Unlimited | Unlimited |
 | Cloud Storage | 0 GB | 5 GB | 25 GB | Unlimited |
 | Backup Storage | 0 GB | 5 GB | 25 GB | Unlimited |
 | LLM Providers | 1 | Unlimited | Unlimited | Unlimited |
 | Batch Builder | — | — | ✓ | ✓ |
 | Plugin Marketplace | — | — | ✓ | ✓ |
 | SSO | — | — | — | ✓ |
 | Rate Limit | 20 req/min | 60 req/min | 120 req/min | 200 req/min |
 ### Stripe Integration
 - **Checkout** — `create_checkout_session(user_id, tier)` creates a Stripe Checkout session. Returns a stub URL when Stripe is not configured.
 - **Webhooks** — Handles `checkout.session.completed`, `customer.subscription.updated`, `customer.subscription.deleted`, and `invoice.payment_failed`.
 - **Subscription management** — `get_subscription()` returns the current subscription record; `cancel_subscription()` cancels via the Stripe API and reverts the user to the free tier.
 - **Price IDs:** `price_pro_monthly`, `price_power_monthly`, `price_team_monthly`
 ### Tier Manager
 - `get_tier(user_id)` — Returns the user's current billing tier.
 - `check_feature(tier, feature)` — Boolean feature gate check.
 - `require_feature(tier, feature)` — Raises HTTP 403 if the feature is not available.
 - `enforce_quota(user_id, tier)` / `enforce_backup_quota(user_id, tier)` — Raises HTTP 402 if storage limits are exceeded.
 ---
 ## Plugin Marketplace
 Source: `app/marketplace/`
 ### Plugin Registry
 - PostgreSQL-backed catalog of submitted and approved plugins.
 - `list_plugins(db, category, query, page, sort)` — Paginated listing (page size: 20) with optional filtering by category, text search, and sorting by `rating`, `installs`, or `newest`.
 - `get_plugin(db, plugin_id)` — Full manifest with install count and ratings.
 - `submit_plugin(db, manifest, s3_key)` — Submits a plugin with `pending_review` status.
 - `approve_plugin()` / `reject_plugin(reason)` — Admin workflow for plugin approval.
 - `record_install()` / `record_uninstall()` — Tracks per-user installations and updates install counts.
 ### Review Queue
 - Automated security checklist before human review:
  - Plugin ID must match `^[a-z0-9-]+$`
  - Permissions must be from the allowed set only
  - No binary blobs in the manifest
 - **Allowed permissions:** `read:tasks`, `write:tasks`, `read:projects`, `write:projects`, `read:notes`, `write:notes`, `read:timelines`, `write:timelines`, `read:calendar`, `write:calendar`
 - `get_pending(db)` — Lists plugins awaiting review.
 - `submit_review(db, plugin_id, reviewer_id, decision, notes)` — Records the review decision.
 ### Revenue Sharing
 - **70% developer / 30% platform** split on all paid plugin sales.
 - `record_install(db, plugin_id, user_id, amount_cents)` — Records the revenue event and triggers a Stripe Connect transfer for the developer share.
 - `get_earnings(db, developer_id, period)` — Aggregated earnings report for plugin developers.
 - Gracefully stubs transfers when Stripe is not configured.
 ### Seed Plugins
 | Plugin | Category | Price |
 |---|---|---|
 | GitHub Sync | Productivity | Free |
 | Slack Notifier | Communication | €4.99 |
 | Time Tracker | Productivity | €9.99 |
 ---
 ## Testing
 ### Running Tests
 ```bash
 # Run all tests
 pytest
 # Run a specific test file
 pytest tests/test_auth.py
 # Run with verbose output
 pytest -v
 ```
 ### Test Infrastructure
 - **Database:** Async SQLite in-memory via `aiosqlite` + `StaticPool` — fast, no PostgreSQL needed.
 - **S3 mock:** `moto[s3]` with a fixture that patches `BlobStore` settings.
 - **Auth helpers:** `make_jwt(tier)` and `auth_header(tier)` generate per-tier test tokens.
 - **Seed data:** Auto-creates one `User` + `Subscription` per tier (free/pro/power/team) before each test.
 - **Plugin seeds:** Fixture adds 3 approved plugins for marketplace tests.
 - **FK enforcement:** SQLite `PRAGMA foreign_keys=ON`.
 - **No external dependencies** — all tests run fully offline.
 ### Test Coverage
 | File | Coverage |
 |---|---|
 | `test_auth.py` | Register, login, token access, refresh, expiration |
 | `test_orchestrator.py` | Intent classification, single agent routing, pipeline, plan mode |
 | `test_agents.py` | Each agent with mocked LLM: registration, tools, handle method |
 | `test_storage.py` | Create, list, download, update, delete records; checksum rejection; quota enforcement |
 | `test_backup.py` | Upload, download, history, delete; tier-based storage limits |
 | `test_plugins.py` | List, install, uninstall, revenue events, tier gate enforcement |
 | `test_agent_registry.py` | Registry singleton, registration, lookup, listing |
 | `test_execution_plan.py` | Plan builder, template registry, plan cache |
 | `test_middleware.py` | Rate limiting by tier, sanitizer prompt leak detection |
 ---
 ## Project Structure
 ```
 adiuva-api/
 ├── alembic.ini                  # Alembic configuration
 ├── BACKEND_PLAN.md              # Architecture & design decisions
 ├── docker-compose.yml           # Docker Compose (app + PostgreSQL)
 ├── Dockerfile                   # Multi-stage production build
 ├── requirements.txt             # Python dependencies
 │
 ├── alembic/                     # Database migrations
 │   ├── env.py                   # Alembic environment config
 │   ├── script.py.mako           # Migration template
 │   └── versions/
 │       ├── 001_initial_schema.py    # Tables, indexes, FKs
 │       └── 002_seed_plugins.py      # Seed marketplace plugins
 │
 ├── app/                         # Application source
 │   ├── main.py                  # FastAPI app factory, middleware, routes
 │   ├── db.py                    # Async SQLAlchemy engine & session
 │   ├── models.py                # SQLAlchemy ORM models (9 tables)
 │   ├── schemas.py               # Pydantic request/response schemas
 │   │
 │   ├── config/
 │   │   └── settings.py          # Pydantic Settings (env vars)
 │   │
 │   ├── agents/                  # LLM-powered domain agents
 │   │   ├── task_agent.py        # Task & comment CRUD (8 tools)
 │   │   ├── project_agent.py     # Project lifecycle (6 tools)
 │   │   ├── timeline_agent.py  # Milestones (4 tools)
 │   │   └── note_agent.py        # Markdown notes (5 tools)
 │   │
 │   ├── core/                    # Orchestration engine
 │   │   ├── agent_registry.py    # BaseAgent, ChatAgent, AgentRegistry
 │   │   ├── llm.py               # LiteLLM factory (get_llm)
 │   │   ├── orchestrator.py      # Intent classification & routing
 │   │   └── execution_plan.py    # Plan builder, templates, cache
 │   │
 │   ├── api/                     # HTTP layer
 │   │   ├── deps.py              # Shared FastAPI dependencies
 │   │   ├── middleware/
 │   │   │   ├── auth.py          # JWT validation, live tier lookup
 │   │   │   ├── rate_limit.py    # Sliding-window tier rate limiter
 │   │   │   └── sanitizer.py     # Prompt IP leak protection
 │   │   └── routes/
 │   │       ├── auth.py          # Register, login, refresh, me
 │   │       ├── chat.py          # Chat + WebSocket streaming
 │   │       ├── plans.py         # Execution plan playbooks
 │   │       ├── storage.py       # E2E encrypted record CRUD
 │   │       ├── vectors.py       # Vector upsert, search, delete
 │   │       ├── backup.py        # Encrypted backup management
 │   │       ├── plugins.py       # Marketplace browse & install
 │   │       └── billing.py       # Stripe checkout & webhooks
 │   │
 │   ├── storage/                 # Storage backends
 │   │   ├── blob_store.py        # S3 blob storage
 │   │   ├── vector_store.py      # Pinecone / Qdrant vector store
 │   │   └── encryption.py        # Checksum verification utilities
 │   │
 │   ├── billing/                 # Subscription management
 │   │   ├── stripe_service.py    # Stripe API integration
 │   │   └── tier_manager.py      # Feature matrix & quota enforcement
 │   │
 │   └── marketplace/             # Plugin ecosystem
 │       ├── plugin_registry.py   # Catalog CRUD & search
 │       ├── plugin_review.py     # Security checklist & review queue
 │       └── revenue_share.py     # 70/30 split & Stripe Connect
 │
 └── tests/                       # Test suite
    ├── conftest.py              # Fixtures: DB, S3, auth, seeds
    ├── test_auth.py
    ├── test_orchestrator.py
    ├── test_agents.py
    ├── test_storage.py
    ├── test_backup.py
    ├── test_plugins.py
    ├── test_agent_registry.py
    ├── test_execution_plan.py
    └── test_middleware.py
 ```
 ---
 ## License
 *To be determined.*
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -16,7 +16,7 @@ import re
 from logging.config import fileConfig
 from alembic import context
-from sqlalchemy import engine_from_config, pool
+from sqlalchemy import pool
 from sqlalchemy.ext.asyncio import create_async_engine
 # Alembic Config object (gives access to alembic.ini values).
--- a/alembic/versions/001_initial_schema.py
+++ b/alembic/versions/001_initial_schema.py
@@ -1,5 +1,4 @@
-"""Initial schema: users, refresh_tokens, subscriptions, storage_records,
+"""Initial schema: users, refresh_tokens, subscriptions.
 backup_metadata, plugins, plugin_installations, plugin_reviews, revenue_events.
 Revision ID: 001
 Revises:
@@ -28,18 +27,6 @@ def upgrade() -> None:
        EXCEPTION WHEN duplicate_object THEN NULL;
        END $$;
    """)
    op.execute("""
        DO $$ BEGIN
            CREATE TYPE plugin_status AS ENUM ('pending_review', 'approved', 'rejected');
        EXCEPTION WHEN duplicate_object THEN NULL;
        END $$;
    """)
    op.execute("""
        DO $$ BEGIN
            CREATE TYPE review_decision AS ENUM ('approved', 'rejected');
        EXCEPTION WHEN duplicate_object THEN NULL;
        END $$;
    """)
    # ── users ─────────────────────────────────────────────────────────────
    op.create_table(
@@ -88,122 +75,10 @@ def upgrade() -> None:
    op.create_index("ix_subscriptions_user_id", "subscriptions", ["user_id"])
    op.create_index("ix_subscriptions_stripe_id", "subscriptions", ["stripe_subscription_id"])
    # ── storage_records ───────────────────────────────────────────────────
    op.create_table(
        "storage_records",
        sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("table_name", sa.String(100), nullable=False),
        sa.Column("s3_key", sa.String(500), nullable=False),
        sa.Column("checksum", sa.String(64), nullable=False),
        sa.Column("size_bytes", sa.Integer, nullable=False),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
    )
    op.create_index("ix_storage_records_user_id", "storage_records", ["user_id"])
    # ── backup_metadata ───────────────────────────────────────────────────
    op.create_table(
        "backup_metadata",
        sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("s3_key", sa.String(500), nullable=False),
        sa.Column("version", sa.Integer, nullable=False),
        sa.Column("timestamp", sa.BigInteger, nullable=False),
        sa.Column("checksum", sa.String(64), nullable=False),
        sa.Column("size_bytes", sa.Integer, nullable=False),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
    )
    op.create_index("ix_backup_metadata_user_id", "backup_metadata", ["user_id"])
    # ── plugins ───────────────────────────────────────────────────────────
    op.create_table(
        "plugins",
        sa.Column("id", sa.String(255), nullable=False),
        sa.Column("name", sa.String(255), nullable=False),
        sa.Column("description", sa.Text, nullable=False, server_default=""),
        sa.Column("version", sa.String(50), nullable=False, server_default="1.0.0"),
        sa.Column("author_id", postgresql.UUID(as_uuid=False), nullable=True),
        sa.Column("author_name", sa.String(255), nullable=False, server_default=""),
        sa.Column("category", sa.String(100), nullable=False, server_default=""),
        sa.Column("price_cents", sa.Integer, nullable=False, server_default="0"),
        sa.Column("permissions", sa.Text, nullable=False, server_default="[]"),
        sa.Column("status", postgresql.ENUM("pending_review", "approved", "rejected", name="plugin_status", create_type=False), nullable=False, server_default="pending_review"),
        sa.Column("s3_package_key", sa.String(500), nullable=True),
        sa.Column("install_count", sa.Integer, nullable=False, server_default="0"),
        sa.Column("avg_rating", sa.Float, nullable=False, server_default="0.0"),
        sa.Column("rejection_reason", sa.Text, nullable=True),
        sa.Column("submitted_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(["author_id"], ["users.id"], ondelete="SET NULL"),
    )
    # ── plugin_installations ──────────────────────────────────────────────
    op.create_table(
        "plugin_installations",
        sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("plugin_id", sa.String(255), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("installed_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(["plugin_id"], ["plugins.id"], ondelete="CASCADE"),
        sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
        sa.UniqueConstraint("plugin_id", "user_id", name="uq_plugin_user"),
    )
    op.create_index("ix_plugin_installations_plugin_id", "plugin_installations", ["plugin_id"])
    op.create_index("ix_plugin_installations_user_id", "plugin_installations", ["user_id"])
    # ── plugin_reviews ────────────────────────────────────────────────────
    op.create_table(
        "plugin_reviews",
        sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("plugin_id", sa.String(255), nullable=False),
        sa.Column("reviewer_id", postgresql.UUID(as_uuid=False), nullable=True),
        sa.Column("decision", postgresql.ENUM("approved", "rejected", name="review_decision", create_type=False), nullable=False),
        sa.Column("notes", sa.Text, nullable=True),
        sa.Column("reviewed_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(["plugin_id"], ["plugins.id"], ondelete="CASCADE"),
        sa.ForeignKeyConstraint(["reviewer_id"], ["users.id"], ondelete="SET NULL"),
    )
    op.create_index("ix_plugin_reviews_plugin_id", "plugin_reviews", ["plugin_id"])
    # ── revenue_events ────────────────────────────────────────────────────
    op.create_table(
        "revenue_events",
        sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("plugin_id", sa.String(255), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("amount_cents", sa.Integer, nullable=False, server_default="0"),
        sa.Column("developer_share_cents", sa.Integer, nullable=False, server_default="0"),
        sa.Column("stripe_transfer_id", sa.String(255), nullable=True),
        sa.Column("paid_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(["plugin_id"], ["plugins.id"], ondelete="CASCADE"),
        sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
    )
    op.create_index("ix_revenue_events_plugin_id", "revenue_events", ["plugin_id"])
    op.create_index("ix_revenue_events_user_id", "revenue_events", ["user_id"])
 def downgrade() -> None:
    op.drop_table("revenue_events")
    op.drop_table("plugin_reviews")
    op.drop_table("plugin_installations")
    op.drop_table("plugins")
    op.drop_table("backup_metadata")
    op.drop_table("storage_records")
    op.drop_table("subscriptions")
    op.drop_table("refresh_tokens")
    op.drop_table("users")
    op.execute("DROP TYPE IF EXISTS review_decision")
    op.execute("DROP TYPE IF EXISTS plugin_status")
    op.execute("DROP TYPE IF EXISTS billing_tier")
--- a/alembic/versions/002_seed_plugins.py
+++ b/alembic/versions/002_seed_plugins.py
@@ -1,92 +0,0 @@
 """Seed approved plugins: GitHub Sync, Slack Notifier, Time Tracker.
 Revision ID: 002
 Revises: 001
 Create Date: 2026-03-03
 """
 from __future__ import annotations
 import json
 from datetime import datetime, timezone
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 revision: str = "002"
 down_revision: Union[str, None] = "001"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 _SEED_PLUGINS = [
    {
        "id": "plugin-github-sync",
        "name": "GitHub Sync",
        "description": "Sync tasks with GitHub Issues and pull requests.",
        "version": "1.0.0",
        "author_name": "Adiuva",
        "category": "productivity",
        "price_cents": 0,
        "permissions": json.dumps(["read:tasks", "write:tasks"]),
        "status": "approved",
        "s3_package_key": "plugins/plugin-github-sync/1.0.0/package.zip",
        "install_count": 0,
        "avg_rating": 0.0,
    },
    {
        "id": "plugin-slack-notify",
        "name": "Slack Notifier",
        "description": "Post task and timeline updates to Slack channels.",
        "version": "1.2.0",
        "author_name": "Adiuva",
        "category": "communication",
        "price_cents": 499,
        "permissions": json.dumps(["read:tasks", "read:timelines"]),
        "status": "approved",
        "s3_package_key": "plugins/plugin-slack-notify/1.2.0/package.zip",
        "install_count": 0,
        "avg_rating": 0.0,
    },
    {
        "id": "plugin-time-tracker",
        "name": "Time Tracker",
        "description": "Track time spent on tasks with automatic reporting.",
        "version": "0.9.1",
        "author_name": "Third Party",
        "category": "productivity",
        "price_cents": 999,
        "permissions": json.dumps(["read:tasks", "write:tasks"]),
        "status": "approved",
        "s3_package_key": "plugins/plugin-time-tracker/0.9.1/package.zip",
        "install_count": 0,
        "avg_rating": 0.0,
    },
 ]
 def upgrade() -> None:
    plugins = sa.table(
        "plugins",
        sa.column("id", sa.String),
        sa.column("name", sa.String),
        sa.column("description", sa.Text),
        sa.column("version", sa.String),
        sa.column("author_name", sa.String),
        sa.column("category", sa.String),
        sa.column("price_cents", sa.Integer),
        sa.column("permissions", sa.Text),
        sa.column("status", sa.Enum("pending_review", "approved", "rejected", name="plugin_status")),
        sa.column("s3_package_key", sa.String),
        sa.column("install_count", sa.Integer),
        sa.column("avg_rating", sa.Float),
    )
    op.bulk_insert(plugins, _SEED_PLUGINS)
 def downgrade() -> None:
    op.execute(
        "DELETE FROM plugins WHERE id IN ("
        "'plugin-github-sync', 'plugin-slack-notify', 'plugin-time-tracker'"
        ")"
    )
--- a/alembic/versions/003_agent_tables.py
+++ b/alembic/versions/003_agent_tables.py
@@ -14,7 +14,7 @@ from alembic import op
 from sqlalchemy.dialects import postgresql
 revision: str = "003"
-down_revision: Union[str, None] = "002"
+down_revision: Union[str, None] = "001"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
--- a/alembic/versions/005_associative_pgvector.py
+++ b/alembic/versions/005_associative_pgvector.py
@@ -0,0 +1,54 @@
 """Phase 1 — confirm pgvector activation on memory_associative.
 Migration 004 created the embedding column as vector(1536) and added the
 IVFFlat index.  This migration is the Phase-1 checkpoint:
  1. Ensures the pgvector extension is enabled (idempotent).
  2. Ensures the canonical Phase-1 IVFFlat index exists under the name
     memory_associative_embedding_idx (creates it only if absent).
 Revision ID: 005
 Revises: 9a1f2d0b6c7e
 Create Date: 2026-04-15
 """
 from __future__ import annotations
 from typing import Sequence, Union
 from alembic import op
 revision: str = "005"
 down_revision: Union[str, None] = "e04100e88ace"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    # Ensure pgvector extension is enabled (also done in 004, idempotent).
    op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
    # Ensure the canonical Phase-1 IVFFlat index exists.
    # 004 may have created ix_memory_associative_embedding; this adds the
    # Phase-1 name memory_associative_embedding_idx if it is missing.
    op.execute(
        """
        DO $$
        BEGIN
            IF NOT EXISTS (
                SELECT 1
                FROM   pg_indexes
                WHERE  tablename  = 'memory_associative'
                  AND  indexname  = 'memory_associative_embedding_idx'
            ) THEN
                CREATE INDEX memory_associative_embedding_idx
                ON memory_associative
                USING ivfflat (embedding vector_cosine_ops)
                WITH  (lists = 100);
            END IF;
        END $$;
        """
    )
 def downgrade() -> None:
    op.execute("DROP INDEX IF EXISTS memory_associative_embedding_idx;")
--- a/alembic/versions/006_memory_relations.py
+++ b/alembic/versions/006_memory_relations.py
@@ -0,0 +1,74 @@
 """Add memory_relations table (Phase 3 — relational tier).
 Revision ID: 006
 Revises: 1f5975a4f3f4
 Create Date: 2026-04-16
 """
 from __future__ import annotations
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 from sqlalchemy.dialects import postgresql
 revision: str = "006"
 down_revision: Union[str, None] = "1f5975a4f3f4"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.create_table(
        "memory_relations",
        sa.Column("id", postgresql.UUID(as_uuid=False), primary_key=True),
        sa.Column(
            "user_id",
            postgresql.UUID(as_uuid=False),
            sa.ForeignKey("users.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column("subject_label", sa.String(128), nullable=False),
        sa.Column("subject_type", sa.String(32), nullable=False),
        sa.Column("predicate", sa.String(64), nullable=False),
        sa.Column("object_label", sa.String(128), nullable=False),
        sa.Column("object_type", sa.String(32), nullable=False),
        sa.Column("confidence", sa.Float, nullable=False, server_default="0.7"),
        sa.Column(
            "source_episode_id",
            postgresql.UUID(as_uuid=False),
            sa.ForeignKey("memory_episodic.id", ondelete="SET NULL"),
            nullable=True,
        ),
        sa.Column("notes_encrypted", sa.LargeBinary, nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.func.now(),
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.func.now(),
        ),
        sa.Column("last_confirmed_at", sa.DateTime(timezone=True), nullable=True),
    )
    op.create_index(
        "memory_relations_user_subject_idx",
        "memory_relations",
        ["user_id", "subject_label"],
    )
    op.create_index(
        "memory_relations_user_predicate_idx",
        "memory_relations",
        ["user_id", "predicate"],
    )
 def downgrade() -> None:
    op.drop_index("memory_relations_user_predicate_idx", "memory_relations")
    op.drop_index("memory_relations_user_subject_idx", "memory_relations")
    op.drop_table("memory_relations")
--- a/alembic/versions/007_rename_agents_to_scouts.py
+++ b/alembic/versions/007_rename_agents_to_scouts.py
@@ -0,0 +1,41 @@
 """Rename agents to scouts.
 Revision ID: 007
 Revises: d6e3f4a5b6c7
 Create Date: 2026-05-15
 Renames the entire agents subsystem identifiers to scouts.
 Pre-1.0 — no data preservation concerns beyond ALTER TABLE rename.
 """
 from typing import Sequence, Union
 from alembic import op
 revision: str = "007"
 down_revision: Union[str, None] = "d6e3f4a5b6c7"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    # Tables
    op.rename_table("local_agent_configs", "local_scout_configs")
    op.rename_table("cloud_agent_configs", "cloud_scout_configs")
    op.rename_table("agent_run_logs", "scout_run_logs")
    # Columns
    op.alter_column("local_scout_configs", "agent_config", new_column_name="scout_config")
    op.alter_column("scout_run_logs", "agent_id", new_column_name="scout_id")
    op.alter_column("scout_run_logs", "agent_type", new_column_name="scout_type")
 def downgrade() -> None:
    op.alter_column("scout_run_logs", "scout_type", new_column_name="agent_type")
    op.alter_column("scout_run_logs", "scout_id", new_column_name="agent_id")
    op.alter_column("local_scout_configs", "scout_config", new_column_name="agent_config")
    op.rename_table("scout_run_logs", "agent_run_logs")
    op.rename_table("cloud_scout_configs", "cloud_agent_configs")
    op.rename_table("local_scout_configs", "local_agent_configs")
--- a/alembic/versions/008_scout_triage_queue.py
+++ b/alembic/versions/008_scout_triage_queue.py
@@ -0,0 +1,59 @@
 """Scout triage queue + cloud_scout_configs alterations.
 Revision ID: 008
 Revises: 007
 Create Date: 2026-05-16
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 revision: str = "008"
 down_revision: Union[str, None] = "007"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.create_table(
        "scout_triage_queue",
        sa.Column("id", sa.Uuid(as_uuid=False), primary_key=True),
        sa.Column("user_id", sa.Uuid(as_uuid=False), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True),
        sa.Column("scout_id", sa.Uuid(as_uuid=False), sa.ForeignKey("cloud_scout_configs.id", ondelete="CASCADE"), nullable=False),
        sa.Column("source_type", sa.String(50), nullable=False),
        sa.Column("source_msg_ref", sa.String(255), nullable=False),
        sa.Column("triage_verdict", sa.String(20), nullable=False),
        sa.Column("triage_reason", sa.Text, nullable=True),
        sa.Column("status", sa.String(20), nullable=False, server_default="queued"),
        sa.Column("triaged_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()),
        sa.Column("delivered_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("acked_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False),
        sa.UniqueConstraint("scout_id", "source_msg_ref", name="uq_scout_triage_queue_scout_msg"),
    )
    op.create_index("ix_scout_triage_queue_user_status", "scout_triage_queue", ["user_id", "status"])
    op.create_index(
        "ix_scout_triage_queue_expires_active",
        "scout_triage_queue",
        ["expires_at"],
        postgresql_where=sa.text("status != 'acked'"),
    )
    op.add_column("cloud_scout_configs", sa.Column("auto_trash_spam", sa.Boolean(), nullable=False, server_default=sa.text("false")))
    op.add_column("cloud_scout_configs", sa.Column("gmail_history_id", sa.String(64), nullable=True))
    op.add_column("cloud_scout_configs", sa.Column("gmail_watch_expires_at", sa.DateTime(timezone=True), nullable=True))
    op.add_column("cloud_scout_configs", sa.Column("device_inactivity_pause_days", sa.Integer(), nullable=False, server_default="14"))
 def downgrade() -> None:
    op.drop_column("cloud_scout_configs", "device_inactivity_pause_days")
    op.drop_column("cloud_scout_configs", "gmail_watch_expires_at")
    op.drop_column("cloud_scout_configs", "gmail_history_id")
    op.drop_column("cloud_scout_configs", "auto_trash_spam")
    op.drop_index("ix_scout_triage_queue_expires_active", table_name="scout_triage_queue")
    op.drop_index("ix_scout_triage_queue_user_status", table_name="scout_triage_queue")
    op.drop_table("scout_triage_queue")
--- a/alembic/versions/1f5975a4f3f4_add_extraction_queue.py
+++ b/alembic/versions/1f5975a4f3f4_add_extraction_queue.py
@@ -0,0 +1,38 @@
 """add extraction_queue
 Revision ID: 1f5975a4f3f4
 Revises: 005
 Create Date: 2026-04-16 17:26:25.790870
 """
 from __future__ import annotations
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision: str = '1f5975a4f3f4'
 down_revision: Union[str, None] = '005'
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.create_table(
        'extraction_queue',
        sa.Column('id', sa.Uuid(as_uuid=False), nullable=False),
        sa.Column('user_id', sa.Uuid(as_uuid=False), nullable=False),
        sa.Column('episode_id', sa.Uuid(as_uuid=False), nullable=True),
        sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
        sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'),
        sa.PrimaryKeyConstraint('id'),
    )
    op.create_index(op.f('ix_extraction_queue_user_id'), 'extraction_queue', ['user_id'], unique=False)
 def downgrade() -> None:
    op.drop_index(op.f('ix_extraction_queue_user_id'), table_name='extraction_queue')
    op.drop_table('extraction_queue')
--- a/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py
+++ b/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py
@@ -0,0 +1,107 @@
 """Restore agent config tables and add agent_config column.
 9a1f2d0b6c7e dropped local_agent_configs and cloud_agent_configs, but both
 ORM models are still active. This migration recreates them with agent_config
 added to local_agent_configs.
 Revision ID: a3b9c0d1e2f3
 Revises: 9a1f2d0b6c7e
 Create Date: 2026-04-07 00:00:00.000000
 """
 from __future__ import annotations
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision: str = "a3b9c0d1e2f3"
 down_revision: Union[str, None] = "9a1f2d0b6c7e"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    # Recreate enum types (idempotent — they may already exist from migration 003)
    op.execute("""
        DO $$ BEGIN
            CREATE TYPE agent_type AS ENUM ('local', 'cloud');
        EXCEPTION WHEN duplicate_object THEN NULL;
        END $$;
    """)
    op.execute("""
        DO $$ BEGIN
            CREATE TYPE agent_run_status AS ENUM ('running', 'success', 'error', 'partial');
        EXCEPTION WHEN duplicate_object THEN NULL;
        END $$;
    """)
    op.execute("""
        DO $$ BEGIN
            CREATE TYPE cloud_provider AS ENUM ('gmail', 'teams', 'outlook');
        EXCEPTION WHEN duplicate_object THEN NULL;
        END $$;
    """)
    bind = op.get_bind()
    inspector = sa.inspect(bind)
    existing = set(inspector.get_table_names())
    # ── local_agent_configs (with agent_config column) ────────────────────
    if "local_agent_configs" not in existing:
        op.create_table(
            "local_agent_configs",
            sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
            sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
            sa.Column("device_id", sa.String(255), nullable=False),
            sa.Column("name", sa.String(255), nullable=False),
            sa.Column("directory_paths", sa.JSON, nullable=False, server_default="[]"),
            sa.Column("data_types", sa.JSON, nullable=False, server_default="[]"),
            sa.Column("prompt_template", sa.Text, nullable=False, server_default=""),
            sa.Column("agent_config", sa.JSON, nullable=True),
            sa.Column("file_extensions", sa.JSON, nullable=False, server_default="[]"),
            sa.Column("schedule_cron", sa.String(100), nullable=False, server_default="0 */6 * * *"),
            sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.true()),
            sa.Column("last_run_at", sa.DateTime(timezone=True), nullable=True),
            sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
            sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
            sa.PrimaryKeyConstraint("id"),
            sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
        )
        op.create_index("ix_local_agent_configs_user_id", "local_agent_configs", ["user_id"])
    # ── cloud_agent_configs ───────────────────────────────────────────────
    if "cloud_agent_configs" not in existing:
        op.create_table(
            "cloud_agent_configs",
            sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
            sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
            sa.Column(
                "provider",
                postgresql.ENUM("gmail", "teams", "outlook", name="cloud_provider", create_type=False),
                nullable=False,
            ),
            sa.Column("name", sa.String(255), nullable=False),
            sa.Column("data_types", sa.JSON, nullable=False, server_default="[]"),
            sa.Column("prompt_template", sa.Text, nullable=False, server_default=""),
            sa.Column("oauth_token_encrypted", sa.Text, nullable=True),
            sa.Column("filter_config", sa.JSON, nullable=True),
            sa.Column("schedule_cron", sa.String(100), nullable=False, server_default="0 */6 * * *"),
            sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.true()),
            sa.Column("last_run_at", sa.DateTime(timezone=True), nullable=True),
            sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
            sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
            sa.PrimaryKeyConstraint("id"),
            sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
        )
        op.create_index("ix_cloud_agent_configs_user_id", "cloud_agent_configs", ["user_id"])
 def downgrade() -> None:
    op.drop_index("ix_cloud_agent_configs_user_id", table_name="cloud_agent_configs")
    op.drop_table("cloud_agent_configs")
    op.drop_index("ix_local_agent_configs_user_id", table_name="local_agent_configs")
    op.drop_table("local_agent_configs")
--- a/alembic/versions/b4c0d1e2f3a4_add_oauth_and_avatar.py
+++ b/alembic/versions/b4c0d1e2f3a4_add_oauth_and_avatar.py
@@ -0,0 +1,56 @@
 """Add oauth_accounts table, nullable password_hash, avatar_url to users.
 Revision ID: b4c0d1e2f3a4
 Revises: a3b9c0d1e2f3
 Create Date: 2026-04-10 00:00:00.000000
 """
 from __future__ import annotations
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision: str = "b4c0d1e2f3a4"
 down_revision: Union[str, None] = "a3b9c0d1e2f3"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    # ── users: make password_hash nullable (social users have no password) ──
    op.alter_column("users", "password_hash", existing_type=sa.String(255), nullable=True)
    # ── users: add avatar_url ─────────────────────────────────────────────
    op.add_column("users", sa.Column("avatar_url", sa.String(2048), nullable=True))
    # ── oauth_accounts ────────────────────────────────────────────────────
    op.create_table(
        "oauth_accounts",
        sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
        sa.Column("provider", sa.String(50), nullable=False),
        sa.Column("provider_user_id", sa.String(255), nullable=False),
        sa.Column("provider_email", sa.String(255), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            nullable=False,
            server_default=sa.text("now()"),
        ),
        sa.PrimaryKeyConstraint("id"),
        sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
        sa.UniqueConstraint("provider", "provider_user_id", name="uq_oauth_provider_user"),
    )
    op.create_index("ix_oauth_accounts_user_id", "oauth_accounts", ["user_id"])
 def downgrade() -> None:
    op.drop_index("ix_oauth_accounts_user_id", table_name="oauth_accounts")
    op.drop_table("oauth_accounts")
    op.drop_column("users", "avatar_url")
    op.alter_column("users", "password_hash", existing_type=sa.String(255), nullable=False)
--- a/alembic/versions/c5d1e2f3a4b5_add_onboarding_completed_at.py
+++ b/alembic/versions/c5d1e2f3a4b5_add_onboarding_completed_at.py
@@ -0,0 +1,31 @@
 """Add onboarding_completed_at column to users table.
 Revision ID: c5d1e2f3a4b5
 Revises: b4c0d1e2f3a4
 Create Date: 2026-04-11 00:00:00.000000
 """
 from __future__ import annotations
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "c5d1e2f3a4b5"
 down_revision: Union[str, None] = "b4c0d1e2f3a4"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.add_column(
        "users",
        sa.Column("onboarding_completed_at", sa.DateTime(timezone=True), nullable=True),
    )
 def downgrade() -> None:
    op.drop_column("users", "onboarding_completed_at")
--- a/alembic/versions/d6e3f4a5b6c7_folder_index_tables.py
+++ b/alembic/versions/d6e3f4a5b6c7_folder_index_tables.py
@@ -0,0 +1,46 @@
 """Add token tracking columns for folder integration.
 Revision ID: d6e3f4a5b6c7
 Revises: 006
 Create Date: 2026-05-11 00:00:00.000000
 """
 from __future__ import annotations
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 from sqlalchemy.dialects.postgresql import UUID
 # revision identifiers, used by Alembic.
 revision: str = "d6e3f4a5b6c7"
 down_revision: Union[str, None] = "006"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.add_column(
        "agent_run_logs",
        sa.Column("tokens_used", sa.Integer(), nullable=False, server_default="0"),
    )
    op.create_table(
        "monthly_token_usage",
        sa.Column("user_id", UUID(as_uuid=False), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
        sa.Column("year_month", sa.String(7), nullable=False),
        sa.Column("feature", sa.String(64), nullable=False),
        sa.Column("tokens_used", sa.Integer(), nullable=False, server_default="0"),
        sa.PrimaryKeyConstraint("user_id", "year_month", "feature"),
    )
    op.create_index(
        "ix_monthly_token_usage_user_month",
        "monthly_token_usage",
        ["user_id", "year_month"],
    )
 def downgrade() -> None:
    op.drop_index("ix_monthly_token_usage_user_month", table_name="monthly_token_usage")
    op.drop_table("monthly_token_usage")
    op.drop_column("agent_run_logs", "tokens_used")
--- a/alembic/versions/e04100e88ace_avatar_url_varchar_to_text.py
+++ b/alembic/versions/e04100e88ace_avatar_url_varchar_to_text.py
@@ -0,0 +1,34 @@
 """avatar_url_varchar_to_text
 Revision ID: e04100e88ace
 Revises: c5d1e2f3a4b5
 Create Date: 2026-04-13 09:13:06.733674
 """
 from __future__ import annotations
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision: str = 'e04100e88ace'
 down_revision: Union[str, None] = 'c5d1e2f3a4b5'
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.alter_column('users', 'avatar_url',
               existing_type=sa.VARCHAR(length=2048),
               type_=sa.Text(),
               existing_nullable=True)
 def downgrade() -> None:
    op.alter_column('users', 'avatar_url',
               existing_type=sa.Text(),
               type_=sa.VARCHAR(length=2048),
               existing_nullable=True)
--- a/services/auth/app/init.py
+++ b/services/auth/app/init.py
--- a/app/agents/init.py
+++ b/app/agents/init.py
@@ -0,0 +1,5 @@
 """Expose tool modules used by deep orchestrator-worker graphs."""
 from app.agents import filesystem_agent, timeline_agent, note_agent, project_agent, task_agent
 __all__ = ["filesystem_agent", "timeline_agent", "note_agent", "project_agent", "task_agent"]
--- a/app/agents/client_agent.py
+++ b/app/agents/client_agent.py
@@ -0,0 +1,52 @@
 """Client agent — read-only tools for the clients table."""
 from __future__ import annotations
 import json
 from typing import Any
 from langchain_core.tools import tool
 from app.core.ws_context import execute_on_client
@tool
 async def list_clients(search: str = "", limit: int = 20) -> str:
    """List clients, optionally filtered by a name/email substring search.
    search: optional substring to match against client name or email.
    limit: max rows to return (default 20).
    """
    filters: dict[str, Any] = {"limit": limit}
    if search:
        filters["search"] = search
    result = await execute_on_client(action="select", table="clients", filters=filters)
    rows = result.get("rows", [])
    if not rows:
        return "No clients found."
    lines = [
        f"- {r.get('name', '?')} (id: {r.get('id')}, email: {r.get('email', '')}, "
        f"company: {r.get('company', '')})"
        for r in rows
    ]
    return f"Found {len(rows)} client(s):\n" + "\n".join(lines)
@tool
 async def get_client(id: str) -> str:
    """Get full details for one client by UUID.
    id: the client's UUID.
    """
    if not id:
        return "Client id is required."
    result = await execute_on_client(action="get", table="clients", data={"id": id})
    row = result.get("row") or result.get("rows", [None])[0] if result else None
    if not row:
        return f"Client '{id}' not found."
    return f"Client details:\n{json.dumps(row, ensure_ascii=False, indent=2)}"
 CLIENT_TOOLS: list[Any] = [list_clients, get_client]
--- a/app/agents/filesystem_agent.py
+++ b/app/agents/filesystem_agent.py
@@ -0,0 +1,194 @@
 """Filesystem agent — tools for reading local directories and files on Electron.
 These tools delegate to the Electron client via ``execute_on_client()`` using
 the same WS tool-call round-trip pattern as CRUD tools.  The Electron app
 handles actual disk I/O and responds with ``tool_result`` frames.
 """
 from __future__ import annotations
 import os
 import re
 from pathlib import Path
 from typing import Any
 from langchain_core.tools import tool
 from app.core.ws_context import execute_on_client
 # Max characters returned by read_file_content in journey (exploration) tools.
 # The journey only needs to understand file structure, not full content.
 _JOURNEY_READ_MAX_CHARS: int = 4000
 def _resolve_path(path: str, base: str) -> str:
    """Resolve *path* against *base* when *path* is relative.
    The LLM often passes ``"."`` meaning "the configured directory".
    Without this, Electron resolves ``"."`` relative to its own CWD instead
    of the user's chosen directory.
    """
    if os.path.isabs(path):
        return path
    return str(Path(base) / path)
@tool
 async def list_directory(path: str) -> str:
    """List files and folders in a local directory on the user's device.
    Returns a formatted listing of entries with name, type (file/directory),
    and full path.
    """
    result = await execute_on_client(
        action="list_directory",
        data={"path": path},
    )
    entries: list[dict[str, Any]] = result.get("entries", [])
    if not entries:
        return f"Directory '{path}' is empty or does not exist."
    lines: list[str] = []
    for entry in entries:
        entry_type = entry.get("type", "unknown")
        entry_name = entry.get("name", "")
        entry_path = entry.get("path", "")
        lines.append(f"- [{entry_type}] {entry_name}  ({entry_path})")
    return f"Directory listing for '{path}' ({len(entries)} entries):\n" + "\n".join(lines)
@tool
 async def read_file_content(path: str) -> str:
    """Read the text content of a local file on the user's device.
    Returns the file content as a string.  Large files may be truncated
    by the Electron client.
    """
    result = await execute_on_client(
        action="read_file_content",
        data={"path": path},
    )
    content: str = result.get("content", "")
    if not content:
        return f"File '{path}' is empty or could not be read."
    return content
@tool
 async def get_file_metadata(path: str) -> str:
    """Get metadata for a local file: size, creation date, modification date, extension.
    Returns a formatted summary of the file's metadata.
    """
    result = await execute_on_client(
        action="get_file_metadata",
        data={"path": path},
    )
    size = result.get("size", "unknown")
    created = result.get("createdAt", "unknown")
    modified = result.get("modifiedAt", "unknown")
    extension = result.get("extension", "unknown")
    name = result.get("name", path)
    return (
        f"File: {name}\n"
        f"  Extension: {extension}\n"
        f"  Size: {size} bytes\n"
        f"  Created: {created}\n"
        f"  Modified: {modified}"
    )
 FILESYSTEM_TOOLS: list[Any] = [
    list_directory,
    read_file_content,
    get_file_metadata,
 ]
 def make_directory_tools(base_directory: str) -> list[Any]:
    """Return filesystem tools that resolve relative paths against *base_directory*.
    Use this instead of ``FILESYSTEM_TOOLS`` whenever you know the user's target
    directory upfront (e.g., journey setup sessions).  Relative paths like ``"."``
    from the LLM are resolved to the correct absolute path before being sent to
    the Electron client, preventing it from falling back to its own CWD.
    """
    def _compact_for_journey(raw: str) -> str:
        """Strip HTML noise and truncate for journey exploration.
        The journey LLM only needs to understand file structure (headers,
        first paragraphs).  Full CSS/style blocks are pure noise that eat
        up context window budget.
        """
        text = re.sub(r"<style[^>]*>.*?</style>", "", raw, flags=re.DOTALL | re.IGNORECASE)
        text = re.sub(r"<script[^>]*>.*?</script>", "", text, flags=re.DOTALL | re.IGNORECASE)
        text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
        if len(text) > _JOURNEY_READ_MAX_CHARS:
            text = text[:_JOURNEY_READ_MAX_CHARS] + "\n[…truncated for exploration]"
        return text
    @tool
    async def list_directory(path: str) -> str:  # noqa: F811
        """List files and folders in a local directory on the user's device.
        Returns a formatted listing of entries with name, type (file/directory),
        and full path.
        """
        resolved = _resolve_path(path, base_directory)
        result = await execute_on_client(
            action="list_directory",
            data={"path": resolved},
        )
        entries: list[dict[str, Any]] = result.get("entries", [])
        if not entries:
            return f"Directory '{resolved}' is empty or does not exist."
        lines: list[str] = []
        for entry in entries:
            entry_type = entry.get("type", "unknown")
            entry_name = entry.get("name", "")
            entry_path = entry.get("path", "")
            lines.append(f"- [{entry_type}] {entry_name}  ({entry_path})")
        return f"Directory listing for '{resolved}' ({len(entries)} entries):\n" + "\n".join(lines)
    @tool
    async def read_file_content(path: str) -> str:  # noqa: F811
        """Read the text content of a local file on the user's device.
        Returns the file content as a string.  Large files may be truncated
        by the Electron client.
        """
        resolved = _resolve_path(path, base_directory)
        result = await execute_on_client(
            action="read_file_content",
            data={"path": resolved},
        )
        content: str = result.get("content", "")
        if not content:
            return f"File '{resolved}' is empty or could not be read."
        return _compact_for_journey(content)
    @tool
    async def get_file_metadata(path: str) -> str:  # noqa: F811
        """Get metadata for a local file: size, creation date, modification date, extension.
        Returns a formatted summary of the file's metadata.
        """
        resolved = _resolve_path(path, base_directory)
        result = await execute_on_client(
            action="get_file_metadata",
            data={"path": resolved},
        )
        size = result.get("size", "unknown")
        created = result.get("createdAt", "unknown")
        modified = result.get("modifiedAt", "unknown")
        extension = result.get("extension", "unknown")
        name = result.get("name", resolved)
        return (
            f"File: {name}\n"
            f"  Extension: {extension}\n"
            f"  Size: {size} bytes\n"
            f"  Created: {created}\n"
            f"  Modified: {modified}"
        )
    return [list_directory, read_file_content, get_file_metadata]
--- a/app/agents/folder_agent.py
+++ b/app/agents/folder_agent.py
@@ -0,0 +1,168 @@
 """Scoped file-read and search tools for the project folder feature."""
 from __future__ import annotations
 from langchain_core.tools import tool
 from app.core.folder_indexer import _extract_docx_text, _extract_pdf_text
 from app.core.ws_context import execute_on_client
 # Cap returned slice size to keep tool output under control.
 _MAX_RETURN_CHARS = 50_000
 _MAX_SEARCH_MATCHES = 20
 def _is_unsafe_path(rel: str) -> bool:
    if not rel:
        return True
    norm = rel.replace("\\", "/")
    if norm.startswith("/"):
        return True
    # Windows drive letter
    if len(rel) >= 2 and rel[1] == ":":
        return True
    parts = norm.split("/")
    return ".." in parts
 async def _fetch_file(project_id: str, relative_path: str, offset: int, length: int) -> dict:
    """Return the raw Electron tool_result dict for a file read."""
    return await execute_on_client(
        action="read_project_folder_file",
        data={
            "projectId": project_id,
            "relativePath": relative_path,
            "offset": offset,
            "length": length,
        },
    )
 def _decode(result: dict) -> tuple[str, str, int]:
    """Decode a tool_result into (text, kind, total_size). For pdf/docx,
    extracts text from base64. For images, returns a placeholder string.
    For text, content is already a sliced utf-8 string.
    """
    kind = result.get("kind", "text")
    content = result.get("content", "") or ""
    total = int(result.get("totalSize", 0) or 0)
    if kind == "image":
        return ("[Image file — cannot be navigated as text. See manifest summary.]", kind, total)
    if kind == "pdf":
        return (_extract_pdf_text(content), kind, total)
    if kind == "docx":
        return (_extract_docx_text(content), kind, total)
    return (content, kind, total)
@tool
 async def read_project_folder_file(
    project_id: str,
    relative_path: str,
    offset: int = 0,
    length: int = _MAX_RETURN_CHARS,
 ) -> str:
    """Read a slice of a file inside the project's linked folder.
    Args:
        project_id: project ID.
        relative_path: path relative to the linked folder root.
        offset: char offset to start reading from (0 = beginning).
        length: max chars to return. Default 50000. Use smaller values to save tokens.
    Returns text content slice with a header showing position. Header tells you
    when more content is available; call again with the suggested next offset.
    For PDF / DOCX files the backend extracts text first, then applies offset/length
    on the extracted text. For images returns a placeholder; navigate with the
    manifest summary instead.
    """
    if _is_unsafe_path(relative_path):
        return "Access denied"
    result = await _fetch_file(project_id, relative_path, offset, length)
    text, kind, total_size = _decode(result)
    if not text and kind in ("missing", "error"):
        return f"File not found or unreadable: {relative_path}"
    if kind in ("pdf", "docx"):
        # Backend extracted full text — apply offset/length on chars.
        sliced = text[offset:offset + length]
        slice_end = min(offset + length, len(text))
        header = (
            f"[file={relative_path} kind={kind} offset={offset} end={slice_end} "
            f"totalChars={len(text)}]"
        )
        if slice_end < len(text):
            header += f"\n[More content available — call again with offset={slice_end}.]"
        return header + "\n" + sliced
    if kind == "text":
        slice_end = offset + len(text)
        header = (
            f"[file={relative_path} kind=text offset={offset} end={slice_end} "
            f"totalBytes={total_size}]"
        )
        if slice_end < total_size:
            header += f"\n[More content available — call again with offset={slice_end}.]"
        return header + "\n" + text
    # image or unknown
    return text
@tool
 async def search_project_folder_file(
    project_id: str,
    relative_path: str,
    query: str,
    context_lines: int = 3,
 ) -> str:
    """Search a project folder file for a query string (case-insensitive substring).
    Args:
        project_id: project ID.
        relative_path: path relative to the linked folder root.
        query: text to search for.
        context_lines: number of lines of context around each match (default 3).
    Returns matching line ranges with surrounding context and 1-based line numbers.
    Capped at 20 matches; if more exist the header shows the total.
    Works on text, code, markdown, PDF (extracted), and DOCX (extracted).
    Images and binary files are not searchable.
    """
    if _is_unsafe_path(relative_path):
        return "Access denied"
    if not query:
        return "Empty query."
    # For text we still need full file; pass length=very large.
    result = await _fetch_file(project_id, relative_path, offset=0, length=10_000_000)
    text, kind, _ = _decode(result)
    if not text and kind in ("missing", "error"):
        return f"File not found or unreadable: {relative_path}"
    if kind == "image":
        return "Cannot search inside images."
    lines = text.splitlines()
    q = query.lower()
    matches = [i for i, line in enumerate(lines) if q in line.lower()]
    if not matches:
        return f"No matches for '{query}' in {relative_path}."
    shown = matches[:_MAX_SEARCH_MATCHES]
    snippets: list[str] = []
    for i in shown:
        start = max(0, i - context_lines)
        end = min(len(lines), i + context_lines + 1)
        block = "\n".join(f"{n + 1:5d}: {lines[n]}" for n in range(start, end))
        snippets.append(block)
    header = f"[file={relative_path} matches={len(matches)} showing={len(shown)} query='{query}']"
    body = "\n---\n".join(snippets)
    return header + "\n" + body
 FOLDER_TOOLS = [read_project_folder_file, search_project_folder_file]
--- a/app/agents/note_agent.py
+++ b/app/agents/note_agent.py
@@ -0,0 +1,206 @@
 """Note agent — Markdown note management (list, get, create, update, propose edit)."""
 from __future__ import annotations
 import asyncio
 import re
 from typing import Any
 from langchain_core.tools import tool
 from app.core.note_summarizer import generate_note_summary
 from app.core.ws_context import execute_on_client
 _UUID_RE = re.compile(
    r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
 )
 def _is_uuid(value: str) -> bool:
    return bool(_UUID_RE.match(value))
 def _fmt_summary(row: dict) -> str:
    summary = (row.get("aiSummary") or row.get("ai_summary") or "").strip()
    if summary:
        return f" — {summary}"
    snippet = (row.get("content") or "")[:120].replace("\n", " ").strip()
    return f" — {snippet}" if snippet else ""
@tool
 async def list_notes(project_id: str = "") -> str:
    """List notes with AI summaries, optionally scoped to a project by project_id.
    Returns id, title, and ai_summary for each note so you can decide which
    note to read in full with get_note before creating or updating.
    """
    normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else ""
    result = await execute_on_client(
        action="select",
        table="notes",
        filters={"projectId": normalized_project_id or None},
    )
    rows = result.get("rows", [])
    if not rows:
        return "No notes found."
    lines = [f"  - [{r['id']}] {r['title']}{_fmt_summary(r)}" for r in rows]
    return f"Found {len(rows)} note(s):\n" + "\n".join(lines)
@tool
 async def get_note(note_id: str) -> str:
    """Fetch a single note by its UUID to read its full Markdown content."""
    result = await execute_on_client(action="get", table="notes", data={"id": note_id})
    row = result.get("row")
    if not row:
        return f"Note {note_id} not found."
    return f"Note '{row['title']}' (id: {row['id']}):\n\n{row['content']}"
@tool
 async def create_note(
    title: str,
    content: str,
    project_id: str = "",
 ) -> str:
    """Create a new note.
    title: note heading (required)
    content: Markdown body text (required)
    project_id: optional UUID linking this note to a project
    """
    result = await execute_on_client(
        action="insert",
        table="notes",
        data={
            "title": title,
            "content": content,
            "projectId": project_id or None,
        },
    )
    row = result["row"]
    note_id: str = row["id"]
    # Generate summary asynchronously — fire-and-forget.
    asyncio.create_task(_refresh_summary(note_id, title, content))
    return f"Note created: '{row['title']}' (id: {note_id})."
@tool
 async def update_note(
    note_id: str,
    title: str = "",
    content: str = "",
 ) -> str:
    """Update an existing note directly (no approval required).
    Use propose_note_edit instead when human review is needed.
    note_id: UUID of the note (required)
    If you need to preserve existing content, call get_note first.
    """
    updates: dict[str, Any] = {}
    if title:
        updates["title"] = title
    if content:
        updates["content"] = content
    result = await execute_on_client(
        action="update",
        table="notes",
        data={"id": note_id, "updates": updates},
    )
    row = result["row"]
    if content:
        new_title = title or row.get("title", "")
        asyncio.create_task(_refresh_summary(note_id, new_title, content))
    return f"Note updated: '{row['title']}' (id: {row['id']})."
@tool
 async def propose_note_edit(
    note_id: str,
    edit_type: str,
    proposed_content: str,
    reasoning: str = "",
    anchor_before: str = "",
    anchor_text: str = "",
    agent_id: str = "",
    run_id: str = "",
 ) -> str:
    """Propose an AI edit to an existing note, pending human approval.
    Use this instead of update_note when review_required is true.
    The user will see the proposal highlighted before it is merged.
    note_id: UUID of the target note (required)
    edit_type: 'append' | 'insert' | 'replace'
      - append: adds proposed_content at the end of the note
      - insert: inserts proposed_content immediately after anchor_before text
      - replace: replaces the first occurrence of anchor_text with proposed_content
    proposed_content: the new Markdown text to add or substitute (required)
    reasoning: brief explanation shown to the user (recommended)
    anchor_before: for 'insert' — the text snippet that precedes the insertion point
    anchor_text: for 'replace' — the exact text to be replaced
    agent_id: agent identifier (for traceability)
    run_id: run identifier (for traceability)
    """
    if edit_type not in ("append", "insert", "replace"):
        return f"Invalid edit_type '{edit_type}'. Use 'append', 'insert', or 'replace'."
    result = await execute_on_client(
        action="propose_note_edit",
        data={
            "noteId": note_id,
            "type": edit_type,
            "proposedContent": proposed_content,
            "reasoning": reasoning or None,
            "anchorBefore": anchor_before or None,
            "anchorText": anchor_text or None,
            "agentId": agent_id or None,
            "runId": run_id or None,
        },
    )
    edit_id = result.get("id", "?")
    return (
        f"Edit proposal created (id: {edit_id}) for note {note_id}. "
        f"Status: pending user approval."
    )
@tool
 async def delete_note(note_id: str) -> str:
    """Delete a note permanently by its UUID."""
    await execute_on_client(action="delete", table="notes", data={"id": note_id})
    return f"Note {note_id} deleted."
 async def _refresh_summary(note_id: str, title: str, content: str) -> None:
    """Generate and persist the AI summary for a note.  Fire-and-forget."""
    try:
        summary = await generate_note_summary(title, content)
        if summary:
            await execute_on_client(
                action="update",
                table="notes",
                data={
                    "id": note_id,
                    "updates": {
                        "aiSummary": summary,
                        "aiSummaryUpdatedAt": int(__import__("time").time() * 1000),
                    },
                },
            )
    except Exception:
        pass  # fire-and-forget; errors logged by generate_note_summary
 NOTE_TOOLS: list[Any] = [
    list_notes,
    get_note,
    create_note,
    update_note,
    propose_note_edit,
    delete_note,
 ]
 NOTE_READ_TOOLS: list[Any] = [
    list_notes,
    get_note,
 ]
--- a/shared/agents/project_agent.py
+++ b/shared/agents/project_agent.py
@@ -1,7 +1,4 @@
-"""Project agent — full lifecycle management (list, get, create, update, archive, delete).
+"""Project agent — full lifecycle management (list, get, create, update, archive, delete)."""
 Shared tool definitions used by both Chat and Batch Agent services.
 """
 from __future__ import annotations
@@ -9,23 +6,7 @@ from typing import Any
 from langchain_core.tools import tool
-from shared.ws_context import execute_on_client
+from app.core.ws_context import execute_on_client
 PROJECT_SYSTEM_PROMPT = (
    "You are a project management assistant. You help users create, find,\n"
    "update, and archive projects in their workspace.\n\n"
    "Rules:\n"
    "  - status must be one of: active, archived\n"
    "  - client_id is optional; link to a client only when explicitly mentioned\n"
    "  - ai_summary is populated only when the user asks for a project summary;\n"
    "    derive it from context data — do not fabricate content\n"
    "  - Use list_projects for scoped queries; list_all_projects only when the\n"
    "    user wants a complete cross-client view including archived projects\n"
    "  - get_project requires a project UUID; resolve the ID first by calling\n"
    "    list_projects if you only have a project name\n"
    "  - Prefer archiving (update_project status=archived) over deletion;\n"
    "    only call delete_project when the user explicitly confirms deletion."
 )
@tool
@@ -144,3 +125,9 @@ PROJECT_TOOLS: list[Any] = [
    update_project,
    delete_project,
 ]
 PROJECT_READ_TOOLS: list[Any] = [
    list_projects,
    list_all_projects,
    get_project,
 ]
--- a/app/agents/relations_agent.py
+++ b/app/agents/relations_agent.py
@@ -0,0 +1,63 @@
 """Relations agent — read-only tool wrapping MemoryMiddleware.query_relations."""
 from __future__ import annotations
 from typing import Any
 from langchain_core.tools import tool
 from app.core.memory_middleware import MemoryMiddleware
 from app.db import async_session
 # Injected at tool-factory time by _brief_research_tools(); not a module-level global.
 # Each tool closure captures the user_id bound at factory time.
 def make_query_relations_tool(user_id: str, trace_id: str | None = None) -> Any:
    """Return a query_relations tool bound to *user_id*."""
    @tool
    async def query_relations(
        subject_label: str = "",
        predicate: str = "",
        object_label: str = "",
        limit: int = 10,
    ) -> str:
        """Query the relational memory graph for entity relationships.
        Returns rows where subject ↔ predicate ↔ object match the given filters.
        All parameters are optional — omit to retrieve all relations up to limit.
        subject_label: entity label on the left side (e.g. a client name, "Acme Corp").
        predicate: relationship type (e.g. "mentioned_in", "works_at", "related_to").
        object_label: entity label on the right side (e.g. a project name, "Website Redesign").
        limit: max rows to return (default 10).
        """
        import logging
        logger = logging.getLogger(__name__)
        logger.info(
            "relations_agent: query_relations trace=%s user=%s subject=%r predicate=%r object=%r",
            trace_id or "-", user_id, subject_label, predicate, object_label,
        )
        async with async_session() as db:
            memory = MemoryMiddleware(db)
            rows = await memory.query_relations(
                user_id=user_id,
                subject=subject_label or None,
                predicate=predicate or None,
                object_=object_label or None,
                limit=limit,
            )
        if not rows:
            return "No relational memory entries found for the given filters."
        lines = [
            f"- {r.subject_label} —[{r.predicate}]→ {r.object_label}"
            + (f" (confidence: {r.confidence:.2f})" if r.confidence is not None else "")
            for r in rows
        ]
        return f"Found {len(rows)} relation(s):\n" + "\n".join(lines)
    return query_relations
--- a/app/agents/task_agent.py
+++ b/app/agents/task_agent.py
@@ -0,0 +1,358 @@
 """Task agent — full CRUD for tasks and task comments."""
 from __future__ import annotations
 from datetime import datetime, timezone
 import re
 from typing import Any
 from langchain_core.tools import tool
 from app.core.ws_context import execute_on_client
 _UUID_RE = re.compile(
    r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
 )
 def _is_uuid(value: str) -> bool:
    return bool(_UUID_RE.match(value))
 # ── Task tools ────────────────────────────────────────────────────────
@tool
 async def list_tasks(
    project_id: str = "",
    status: str = "",
    priority: str = "",
    assignee: str = "",
    search: str = "",
    order_by: str = "",
    order_dir: str = "",
    due_date_from: int = -1,
    due_date_to: int = -1,
    created_at_from: int = -1,
    created_at_to: int = -1,
    completed_at_from: int = -1,
    completed_at_to: int = -1,
    is_ai_suggested: int = -1,
    limit: int = 50,
    offset: int = 0,
 ) -> str:
    """List tasks with optional filters. Returns up to `limit` results (default 50).
    project_id: UUID of the project to scope results to.
    status: filter by status — todo | in_progress | done.
    priority: filter by priority — high | medium | low.
    assignee: substring to match against assignee names. OMIT unless the user explicitly
              names a person or refers to themselves ("my tasks", "assigned to me", "mine").
              Do NOT default to the current user.
    search: substring search across title and description.
    order_by: sort field — dueDate | priority | createdAt | completedAt.
    order_dir: asc (default) | desc.
    due_date_from / due_date_to: ms epoch range for dueDate. Use -1 to omit.
    created_at_from / created_at_to: ms epoch range for createdAt. Use -1 to omit.
    completed_at_from / completed_at_to: ms epoch range for completedAt. Use -1 to omit.
    is_ai_suggested: 0 or 1 to filter by AI-suggested flag; -1 = any.
    limit: max rows to return (default 50). Use with offset to paginate.
    offset: skip first N rows (default 0).
    Tip — combine *_from and *_to for a closed range; pass only one for open-ended.
    Tip — prefer count_tasks for "how many" questions to avoid listing rows.
    Tip — for natural-language windows ("today", "tomorrow", "this week", "last month", etc.)
    take due_date_from / due_date_to verbatim from the DATE CONTEXT block in the system prompt;
    do not compute boundaries from the current UTC instant.
    """
    normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else ""
    filters: dict[str, Any] = {
        "projectId": normalized_project_id or None,
        "status": status or None,
        "priority": priority or None,
        "search": search or None,
        "orderBy": order_by or None,
        "orderDir": order_dir or None,
        "limit": limit,
        "offset": offset,
    }
    if assignee:
        filters["assignee"] = assignee
    if due_date_from != -1:
        filters["dueDateFrom"] = due_date_from
    if due_date_to != -1:
        filters["dueDateTo"] = due_date_to
    if created_at_from != -1:
        filters["createdAtFrom"] = created_at_from
    if created_at_to != -1:
        filters["createdAtTo"] = created_at_to
    if completed_at_from != -1:
        filters["completedAtFrom"] = completed_at_from
    if completed_at_to != -1:
        filters["completedAtTo"] = completed_at_to
    if is_ai_suggested != -1:
        filters["isAiSuggested"] = is_ai_suggested
    result = await execute_on_client(action="select", table="tasks", filters=filters)
    rows = result.get("rows", [])
    if not rows:
        return "No tasks found matching the given filters."
    lines = [
        f"- {r['title']} (status: {r['status']}, priority: {r['priority']}, "
        f"dueDate: {r.get('dueDate')}, completedAt: {r.get('completedAt')}, "
        f"projectId: {r.get('projectId')}, id: {r['id']})"
        for r in rows
    ]
    return f"Found {len(rows)} task(s):\n" + "\n".join(lines)
@tool
 async def count_tasks(
    project_id: str = "",
    status: str = "",
    priority: str = "",
    assignee: str = "",
    search: str = "",
    due_date_from: int = -1,
    due_date_to: int = -1,
    created_at_from: int = -1,
    created_at_to: int = -1,
    completed_at_from: int = -1,
    completed_at_to: int = -1,
    is_ai_suggested: int = -1,
 ) -> str:
    """Count tasks matching the given filters without returning rows.
    Use this instead of list_tasks for "how many" questions — it is much cheaper.
    Same filter parameters as list_tasks (no limit/offset/order_by needed).
    assignee: OMIT unless the user explicitly names a person or refers to themselves
              ("my tasks"). Do NOT default to the current user.
    due_date_from / due_date_to: ms epoch range for dueDate. Use -1 to omit.
    created_at_from / created_at_to: ms epoch range for createdAt. Use -1 to omit.
    completed_at_from / completed_at_to: ms epoch range for completedAt. Use -1 to omit.
    Tip — for natural-language windows take due_date_from / due_date_to from the DATE CONTEXT block;
    do not compute boundaries from the current UTC instant.
    """
    normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else ""
    filters: dict[str, Any] = {
        "projectId": normalized_project_id or None,
        "status": status or None,
        "priority": priority or None,
        "search": search or None,
    }
    if assignee:
        filters["assignee"] = assignee
    if due_date_from != -1:
        filters["dueDateFrom"] = due_date_from
    if due_date_to != -1:
        filters["dueDateTo"] = due_date_to
    if created_at_from != -1:
        filters["createdAtFrom"] = created_at_from
    if created_at_to != -1:
        filters["createdAtTo"] = created_at_to
    if completed_at_from != -1:
        filters["completedAtFrom"] = completed_at_from
    if completed_at_to != -1:
        filters["completedAtTo"] = completed_at_to
    if is_ai_suggested != -1:
        filters["isAiSuggested"] = is_ai_suggested
    result = await execute_on_client(action="count", table="tasks", filters=filters)
    return f"Task count: {result.get('count', 0)}"
@tool
 async def create_task(
    title: str,
    description: str = "",
    status: str = "todo",
    priority: str = "medium",
    assignees: str = "[]",
    due_date: int = 0,
    project_id: str = "",
    is_ai_suggested: int = 0,
 ) -> str:
    """Create a new task.
    title: task title (required)
    description: optional details
    status: todo | in_progress | done  (default: todo)
    priority: high | medium | low  (default: medium)
    assignees: JSON-encoded array of assignee names, e.g. '["Alice"]'
    due_date: Unix timestamp in milliseconds; 0 means no due date
    project_id: optional UUID of the parent project
    is_ai_suggested: 1 if proactively suggested, 0 if user-requested
    completedAt is set automatically when status is 'done'.
    """
    result = await execute_on_client(
        action="insert",
        table="tasks",
        data={
            "title": title,
            "description": description or None,
            "status": status,
            "priority": priority,
            "assignee": assignees,
            "dueDate": due_date or None,
            "projectId": project_id or None,
            "isAiSuggested": is_ai_suggested,
        },
    )
    row = result["row"]
    return (
        f"Task created: '{row['title']}' "
        f"(id: {row['id']}, status: {row['status']}, priority: {row['priority']}, projectId: {row.get('projectId')})"
    )
@tool
 async def update_task(
    task_id: str,
    title: str = "",
    description: str = "",
    status: str = "",
    priority: str = "",
    assignees: str = "",
    due_date: int = -1,
    project_id: str = "",
 ) -> str:
    """Update fields on an existing task. Only pass fields you want to change.
    task_id: the task's UUID (required)
    due_date: -1 means unchanged; 0 clears the due date; any positive value sets it
    completedAt is managed automatically:
      - setting status to 'done' records the current timestamp
      - changing status away from 'done' clears completedAt
    """
    updates: dict[str, Any] = {}
    if title:
        updates["title"] = title
    if description:
        updates["description"] = description
    if status:
        updates["status"] = status
    if priority:
        updates["priority"] = priority
    if assignees:
        updates["assignee"] = assignees
    if due_date != -1:
        updates["dueDate"] = due_date or None
    if project_id:
        updates["projectId"] = project_id
    result = await execute_on_client(
        action="update",
        table="tasks",
        data={"id": task_id, "updates": updates},
    )
    row = result["row"]
    return f"Task updated: '{row['title']}' (id: {row['id']}, status: {row['status']}, projectId: {row.get('projectId')})"
@tool
 async def delete_task(task_id: str) -> str:
    """Delete a task permanently by its UUID."""
    await execute_on_client(action="delete", table="tasks", data={"id": task_id})
    return f"Task {task_id} deleted."
@tool
 async def list_tasks_due_today(user_timezone: str = "UTC", include_done: bool = False) -> str:
    """List all tasks whose due date falls on today's date.
    user_timezone: IANA timezone name (e.g. 'Europe/Rome', 'America/New_York').
    Always pass the user's timezone so 'today' is computed in their local time.
    include_done: set True to also include already-completed tasks due today (default False).
    """
    try:
        from zoneinfo import ZoneInfo
        tz = ZoneInfo(user_timezone or "UTC")
    except Exception:
        tz = timezone.utc
    now_local = datetime.now(tz=tz)
    start_dt = datetime(now_local.year, now_local.month, now_local.day, tzinfo=tz)
    start_ms = int(start_dt.timestamp() * 1000)
    end_ms = start_ms + 86_400_000 - 1
    filters: dict[str, Any] = {"dueDateFrom": start_ms, "dueDateTo": end_ms}
    if not include_done:
        filters["status"] = "todo"
    result = await execute_on_client(
        action="select",
        table="tasks",
        filters=filters,
    )
    rows = result.get("rows", [])
    if not rows:
        return "No tasks are due today."
    lines = [
        f"- {r['title']} (priority: {r['priority']}, status: {r['status']}, "
        f"projectId: {r.get('projectId')}, id: {r['id']})"
        for r in rows
    ]
    return f"Tasks due today ({len(rows)}):\n" + "\n".join(lines)
 # ── Task comment tools ────────────────────────────────────────────────
@tool
 async def list_task_comments(task_id: str) -> str:
    """List all comments on a task by its UUID."""
    result = await execute_on_client(
        action="select",
        table="taskComments",
        filters={"taskId": task_id},
    )
    rows = result.get("rows", [])
    if not rows:
        return f"No comments found for task {task_id}."
    lines = [f"- [{r['author']}]: {r['content']} (id: {r['id']})" for r in rows]
    return f"Found {len(rows)} comment(s):\n" + "\n".join(lines)
@tool
 async def add_task_comment(task_id: str, author: str, content: str) -> str:
    """Add a comment to a task.
    task_id: UUID of the task to comment on
    author: name or ID of the comment author
    content: comment text
    """
    result = await execute_on_client(
        action="insert",
        table="taskComments",
        data={"taskId": task_id, "author": author, "content": content},
    )
    row = result.get("row", {})
    row_author = row.get("author", author)
    row_task_id = row.get("taskId") or row.get("task_id") or task_id
    row_comment_id = row.get("id", "unknown")
    return f"Comment added by {row_author} on task {row_task_id} (comment id: {row_comment_id})."
@tool
 async def delete_task_comment(comment_id: str) -> str:
    """Delete a task comment by its UUID."""
    await execute_on_client(action="delete", table="taskComments", data={"id": comment_id})
    return f"Comment {comment_id} deleted."
 # ── Agent ─────────────────────────────────────────────────────────────
 TASK_TOOLS: list[Any] = [
    list_tasks,
    count_tasks,
    create_task,
    update_task,
    delete_task,
    list_tasks_due_today,
    list_task_comments,
    add_task_comment,
    delete_task_comment,
 ]
 TASK_READ_TOOLS: list[Any] = [
    list_tasks,
    count_tasks,
    list_tasks_due_today,
    list_task_comments,
 ]
--- a/app/agents/timeline_agent.py
+++ b/app/agents/timeline_agent.py
@@ -0,0 +1,270 @@
 """Timeline agent — project milestone management (list, create, update, delete)."""
 from __future__ import annotations
 import re
 from datetime import datetime, timezone
 from typing import Any
 from langchain_core.tools import tool
 from app.core.ws_context import execute_on_client
 _UUID_RE = re.compile(
    r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
 )
 def _is_uuid(value: str) -> bool:
    return bool(_UUID_RE.match(value))
@tool
 async def list_timelines(
    project_id: str = "",
    type: str = "",
    is_completed: int = -1,
    is_ai_suggested: int = -1,
    order_by: str = "",
    order_dir: str = "",
    date_from: int = -1,
    date_to: int = -1,
    created_at_from: int = -1,
    created_at_to: int = -1,
    completed_at_from: int = -1,
    completed_at_to: int = -1,
    limit: int = 50,
    offset: int = 0,
 ) -> str:
    """List timeline events (milestones, checkpoints, activities) with optional filters.
    project_id: UUID to scope results to a specific project.
    type: filter by event type — milestone | checkpoint | activity.
    is_completed: 0 = incomplete only, 1 = completed only, -1 = any (default).
    is_ai_suggested: 0 or 1 to filter by AI-suggested flag; -1 = any.
    order_by: sort field — date (default) | createdAt | completedAt.
    order_dir: asc (default) | desc.
    date_from / date_to: ms epoch range for the event date. Use -1 to omit.
    created_at_from / created_at_to: ms epoch range for createdAt. Use -1 to omit.
    completed_at_from / completed_at_to: ms epoch range for completedAt. Use -1 to omit.
    limit: max rows to return (default 50). Use with offset to paginate.
    offset: skip first N rows (default 0).
    Tip — combine *_from and *_to for a closed range; pass only one for open-ended.
    Tip — prefer count_timelines for "how many" questions to avoid listing rows.
    Tip — for natural-language windows ("today", "this week", "last month", etc.)
    take date_from / date_to verbatim from the DATE CONTEXT block in the system prompt;
    do not compute boundaries from the current UTC instant.
    """
    normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else ""
    filters: dict[str, Any] = {
        "projectId": normalized_project_id or None,
        "orderBy": order_by or None,
        "orderDir": order_dir or None,
        "limit": limit,
        "offset": offset,
    }
    if type:
        filters["type"] = type
    if is_completed != -1:
        filters["isCompleted"] = is_completed
    if is_ai_suggested != -1:
        filters["isAiSuggested"] = is_ai_suggested
    if date_from != -1:
        filters["dateFrom"] = date_from
    if date_to != -1:
        filters["dateTo"] = date_to
    if created_at_from != -1:
        filters["createdAtFrom"] = created_at_from
    if created_at_to != -1:
        filters["createdAtTo"] = created_at_to
    if completed_at_from != -1:
        filters["completedAtFrom"] = completed_at_from
    if completed_at_to != -1:
        filters["completedAtTo"] = completed_at_to
    result = await execute_on_client(action="select", table="timelines", filters=filters)
    rows = result.get("rows", [])
    if not rows:
        return "No timeline events found."
    lines = [
        f"- {r['title']} (date: {r['date']}, type: {r.get('type')}, "
        f"completed: {bool(r.get('isCompleted'))}, completedAt: {r.get('completedAt')}, "
        f"projectId: {r.get('projectId')}, id: {r['id']})"
        for r in rows
    ]
    return f"Found {len(rows)} timeline event(s):\n" + "\n".join(lines)
@tool
 async def count_timelines(
    project_id: str = "",
    type: str = "",
    is_completed: int = -1,
    is_ai_suggested: int = -1,
    date_from: int = -1,
    date_to: int = -1,
    created_at_from: int = -1,
    created_at_to: int = -1,
    completed_at_from: int = -1,
    completed_at_to: int = -1,
 ) -> str:
    """Count timeline events matching the given filters without returning rows.
    Use this instead of list_timelines for "how many" questions — it is much cheaper.
    Same filter parameters as list_timelines (no limit/offset/order_by needed).
    date_from / date_to: ms epoch range for the event date. Use -1 to omit.
    completed_at_from / completed_at_to: ms epoch range for completedAt. Use -1 to omit.
    Tip — for natural-language windows take date_from / date_to from the DATE CONTEXT block;
    do not compute boundaries from the current UTC instant.
    """
    normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else ""
    filters: dict[str, Any] = {"projectId": normalized_project_id or None}
    if type:
        filters["type"] = type
    if is_completed != -1:
        filters["isCompleted"] = is_completed
    if is_ai_suggested != -1:
        filters["isAiSuggested"] = is_ai_suggested
    if date_from != -1:
        filters["dateFrom"] = date_from
    if date_to != -1:
        filters["dateTo"] = date_to
    if created_at_from != -1:
        filters["createdAtFrom"] = created_at_from
    if created_at_to != -1:
        filters["createdAtTo"] = created_at_to
    if completed_at_from != -1:
        filters["completedAtFrom"] = completed_at_from
    if completed_at_to != -1:
        filters["completedAtTo"] = completed_at_to
    result = await execute_on_client(action="count", table="timelines", filters=filters)
    return f"Timeline event count: {result.get('count', 0)}"
@tool
 async def create_timeline(
    project_id: str,
    title: str,
    date: int,
    type: str = "milestone",
    is_completed: int = 0,
    is_ai_suggested: int = 0,
 ) -> str:
    """Create a project timeline event.
    project_id: REQUIRED UUID of the parent project
    title: descriptive name for the event
    date: Unix timestamp in milliseconds for the event date
    type: milestone (default) | checkpoint | activity
    is_completed: 1 if already completed, 0 if not (default 0)
    is_ai_suggested: 1 if proactively suggested, 0 if user-requested
    completedAt is set automatically when is_completed is 1.
    """
    result = await execute_on_client(
        action="insert",
        table="timelines",
        data={
            "projectId": project_id,
            "title": title,
            "date": date,
            "type": type,
            "isCompleted": is_completed,
            "isAiSuggested": is_ai_suggested,
        },
    )
    row = result["row"]
    return f"Timeline event created: '{row['title']}' (id: {row['id']}, date: {row['date']}, type: {row.get('type')})"
@tool
 async def update_timeline(
    timeline_id: str,
    title: str = "",
    date: int = -1,
    is_completed: int = -1,
 ) -> str:
    """Update a timeline event. Only pass fields that should change.
    timeline_id: UUID of the event (required)
    date: -1 means unchanged; any other value sets the new date (ms timestamp)
    is_completed: 0 = mark incomplete, 1 = mark complete, -1 = unchanged
    completedAt is managed automatically:
      - setting is_completed to 1 records the current timestamp
      - setting is_completed to 0 clears completedAt
    """
    updates: dict[str, Any] = {}
    if title:
        updates["title"] = title
    if date != -1:
        updates["date"] = date
    if is_completed != -1:
        updates["isCompleted"] = is_completed
    result = await execute_on_client(
        action="update",
        table="timelines",
        data={"id": timeline_id, "updates": updates},
    )
    row = result["row"]
    return f"Timeline event updated: '{row['title']}' (id: {row['id']})"
@tool
 async def delete_timeline(timeline_id: str) -> str:
    """Delete a timeline event permanently by its UUID."""
    await execute_on_client(action="delete", table="timelines", data={"id": timeline_id})
    return f"Timeline event {timeline_id} deleted."
@tool
 async def list_timelines_today(user_timezone: str = "UTC", include_completed: bool = True) -> str:
    """List all timeline events whose date falls on today.
    user_timezone: IANA timezone name (e.g. 'Europe/Rome', 'America/New_York').
    Always pass the user's timezone so 'today' is computed in their local time.
    include_completed: set False to exclude already-completed events (default True).
    """
    try:
        from zoneinfo import ZoneInfo
        tz = ZoneInfo(user_timezone or "UTC")
    except Exception:
        tz = timezone.utc
    now_local = datetime.now(tz=tz)
    start_dt = datetime(now_local.year, now_local.month, now_local.day, tzinfo=tz)
    start_ms = int(start_dt.timestamp() * 1000)
    end_ms = start_ms + 86_400_000 - 1
    filters: dict[str, Any] = {"dateFrom": start_ms, "dateTo": end_ms}
    if not include_completed:
        filters["isCompleted"] = 0
    result = await execute_on_client(
        action="select",
        table="timelines",
        filters=filters,
    )
    rows = result.get("rows", [])
    if not rows:
        return "No timeline events today."
    lines = [
        f"- {r['title']} (date: {r['date']}, type: {r.get('type')}, "
        f"completed: {bool(r.get('isCompleted'))}, projectId: {r.get('projectId')}, id: {r['id']})"
        for r in rows
    ]
    return f"Timeline events today ({len(rows)}):\n" + "\n".join(lines)
 TIMELINE_TOOLS: list[Any] = [
    list_timelines,
    count_timelines,
    list_timelines_today,
    create_timeline,
    update_timeline,
    delete_timeline,
 ]
 TIMELINE_READ_TOOLS: list[Any] = [
    list_timelines,
    count_timelines,
    list_timelines_today,
 ]
--- a/services/batch-agent/app/init.py
+++ b/services/batch-agent/app/init.py
--- a/app/api/deps.py
+++ b/app/api/deps.py
@@ -0,0 +1,14 @@
 """Shared FastAPI dependencies.
 ``get_current_user`` and ``oauth2_scheme`` live in ``app.api.middleware.auth``
 (the canonical location per Step 9).  This module re-exports them so that all
 existing route imports (``from app.api.deps import get_current_user``) continue
 to work without modification.
 Step 12 will update ``get_current_user`` to fetch the live tier from PostgreSQL
 instead of reading it from the JWT payload.
 """
 from app.api.middleware.auth import get_current_user, oauth2_scheme  # noqa: F401
 __all__ = ["get_current_user", "oauth2_scheme"]
--- a/app/api/middleware/init.py
+++ b/app/api/middleware/init.py
@@ -0,0 +1,19 @@
 """API middleware package.
 Exports the three middleware components introduced in Step 9:
  - Auth:        ``get_current_user`` FastAPI dependency + ``oauth2_scheme``
  - Rate limit:  ``TierRateLimitMiddleware`` + ``limiter`` (slowapi Limiter)
  - Sanitizer:   ``SanitizerMiddleware``
 """
 from app.api.middleware.auth import get_current_user, oauth2_scheme
 from app.api.middleware.rate_limit import TierRateLimitMiddleware, limiter
 from app.api.middleware.sanitizer import SanitizerMiddleware
 __all__ = [
    "get_current_user",
    "oauth2_scheme",
    "TierRateLimitMiddleware",
    "limiter",
    "SanitizerMiddleware",
 ]
--- a/app/api/middleware/auth.py
+++ b/app/api/middleware/auth.py
@@ -0,0 +1,103 @@
 """Auth middleware — JWT validation dependency.
 ``get_current_user`` is the FastAPI dependency used by all protected routes.
 It decodes the Bearer JWT (identity + expiry), then fetches the current tier
 from the ``subscriptions`` table so that tier changes take effect immediately
 without requiring token re-issue.
 Exempt routes (no JWT required):
  - POST /api/v1/auth/register
  - POST /api/v1/auth/login
  - POST /api/v1/billing/webhook
 """
 from __future__ import annotations
 from fastapi import Depends, HTTPException, status
 from fastapi.security import OAuth2PasswordBearer
 from jose import JWTError, jwt
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.config.settings import settings
 from app.db import get_session
 from app.schemas import UserProfile
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/v1/auth/login")
 async def get_current_user(
    token: str = Depends(oauth2_scheme),
    db: AsyncSession = Depends(get_session),
 ) -> UserProfile:
    """Validate a Bearer JWT and return the authenticated user.
    The JWT is used for identity and expiry only.  The tier is fetched live
    from the ``subscriptions`` table so that upgrades/downgrades take effect
    immediately.  Falls back to ``'free'`` when no subscription row exists.
    Raises HTTP 401 on any invalid or expired token.
    """
    credentials_exc = HTTPException(
        status_code=status.HTTP_401_UNAUTHORIZED,
        detail="Could not validate credentials",
        headers={"WWW-Authenticate": "Bearer"},
    )
    try:
        payload = jwt.decode(
            token, settings.JWT_SECRET, algorithms=[settings.JWT_ALGORITHM]
        )
        user_id: str | None = payload.get("sub")
        email: str | None = payload.get("email")
        if not user_id or not email:
            raise credentials_exc
    except JWTError:
        raise credentials_exc
    # Live tier lookup — subscription row is the authoritative source.
    # In dev, fall back to 'power' (unlimited) so quota limits don't
    # block local development when no Stripe subscription exists.
    from app.models import Subscription, User  # noqa: PLC0415
    result = await db.execute(
        select(Subscription.tier).where(Subscription.user_id == user_id)
    )
    default_tier = "power" if settings.ENV == "dev" else "free"
    tier: str = result.scalar_one_or_none() or default_tier
    # Fetch name/surname/avatar_url/onboarding_completed_at/password_hash from user row.
    user_result = await db.execute(
        select(
            User.name, User.surname, User.avatar_url, User.onboarding_completed_at,
            User.password_hash,
        ).where(User.id == user_id)
    )
    user_row = user_result.one_or_none()
    # Convert onboarding_completed_at to epoch ms (int) or None.
    onboarding_ms: int | None = None
    if user_row and user_row.onboarding_completed_at is not None:
        onboarding_ms = int(user_row.onboarding_completed_at.timestamp() * 1000)
    # Load decrypted core memory.
    from app.core.memory_middleware import MemoryMiddleware  # noqa: PLC0415
    memory_dict: dict[str, str] = {}
    try:
        mw = MemoryMiddleware(db)
        blocks = await mw.list_core_blocks(user_id)
        memory_dict = {b["label"]: b["value"] for b in blocks}
    except Exception:
        pass  # Non-critical — return empty memory on failure
    return UserProfile(
        id=user_id,
        email=email,
        name=user_row.name if user_row else None,
        surname=user_row.surname if user_row else None,
        avatar_url=user_row.avatar_url if user_row else None,
        has_password=bool(user_row.password_hash) if user_row else False,
        tier=tier,
        onboarding_completed_at=onboarding_ms,
        memory=memory_dict,
    )  # type: ignore[arg-type]
--- a/app/api/middleware/rate_limit.py
+++ b/app/api/middleware/rate_limit.py
@@ -0,0 +1,129 @@
 """Tier-aware rate limiting middleware.
 Uses a per-user sliding-window counter (in-process, no Redis required).
 The ``slowapi`` Limiter is also exported for optional route-level decoration.
 Limits (requests per minute):
  - free:  20
  - pro:   60
  - power: 120
  - team:  200
 Exempt paths bypass the limiter entirely:
  - POST /api/v1/auth/register
  - POST /api/v1/auth/login
  - POST /api/v1/billing/webhook
  - GET  /api/v1/health
 """
 from __future__ import annotations
 import json
 import time
 from collections import defaultdict
 from fastapi import Request, Response
 from jose import JWTError, jwt
 from slowapi import Limiter
 from slowapi.util import get_remote_address
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.types import ASGIApp
 from app.config.settings import settings
 _TIER_LIMITS: dict[str, int] = {
    "free": 20,
    "pro": 60,
    "power": 120,
    "team": 200,
 }
 _EXEMPT_PATHS: frozenset[str] = frozenset(
    {
        "/api/v1/auth/register",
        "/api/v1/auth/login",
        "/api/v1/billing/webhook",
        "/api/v1/health",
    }
 )
 def _get_user_id_from_jwt(request: Request) -> str:
    """Key function for the slowapi Limiter: returns JWT sub or remote IP."""
    auth = request.headers.get("Authorization", "")
    token = auth.removeprefix("Bearer ").strip()
    if not token:
        return get_remote_address(request)
    try:
        payload = jwt.decode(
            token, settings.JWT_SECRET, algorithms=[settings.JWT_ALGORITHM]
        )
        return payload.get("sub") or get_remote_address(request)
    except JWTError:
        return get_remote_address(request)
 # Exported Limiter instance — available for optional route-level decoration.
 limiter = Limiter(key_func=_get_user_id_from_jwt)
 class TierRateLimitMiddleware(BaseHTTPMiddleware):
    """Sliding-window rate limiter applied globally across all non-exempt routes.
    Each authenticated user gets their own 60-second window sized by tier.
    Unauthenticated requests pass through (the auth dependency will reject them
    with 401 before the route handler runs).
    """
    def __init__(self, app: ASGIApp) -> None:
        super().__init__(app)
        # user_id → list of request timestamps (float, seconds since epoch)
        self._window: dict[str, list[float]] = defaultdict(list)
    async def dispatch(self, request: Request, call_next) -> Response:  # type: ignore[override]
        if request.url.path in _EXEMPT_PATHS:
            return await call_next(request)
        # Extract JWT claims — if no valid token, pass through for auth dep to handle.
        auth = request.headers.get("Authorization", "")
        token = auth.removeprefix("Bearer ").strip()
        if not token:
            return await call_next(request)
        try:
            payload = jwt.decode(
                token, settings.JWT_SECRET, algorithms=[settings.JWT_ALGORITHM]
            )
            user_id: str = payload.get("sub") or get_remote_address(request)
            tier: str = payload.get("tier", "free")
        except JWTError:
            return await call_next(request)
        limit = _TIER_LIMITS.get(tier, _TIER_LIMITS["free"])
        now = time.monotonic()
        window_start = now - 60.0
        # Slide the window: discard timestamps older than 60 seconds.
        timestamps = [t for t in self._window[user_id] if t > window_start]
        if len(timestamps) >= limit:
            retry_after = max(1, int(60 - (now - min(timestamps))))
            return Response(
                content=json.dumps(
                    {
                        "detail": (
                            f"Rate limit exceeded ({limit} req/min for {tier} tier). "
                            f"Retry in {retry_after}s."
                        )
                    }
                ),
                status_code=429,
                headers={
                    "Retry-After": str(retry_after),
                    "Content-Type": "application/json",
                },
            )
        timestamps.append(now)
        self._window[user_id] = timestamps
        return await call_next(request)
--- a/app/api/middleware/sanitizer.py
+++ b/app/api/middleware/sanitizer.py
@@ -0,0 +1,138 @@
 """Response sanitizer middleware.
 Scans JSON responses from the /api/v1/chat endpoint and strips any fragments
 that could reveal server-side prompt IP:
  - System prompt openers ("You are a/an/the …")
  - Agent routing metadata ("Available agents:", "intent classifier", …)
  - LangChain tool schema fragments (``"type": "function"``)
  - Internal reasoning markers (<thinking>, <reasoning>, [INST], …)
  - Exact-match known prompt fingerprints
 The middleware only activates for paths under /api/v1/chat.
 Any sanitisation event is logged as a WARNING with the request path and the
 names of the fields that were modified.
 """
 from __future__ import annotations
 import json
 import logging
 import re
 from fastapi import Request, Response
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.types import ASGIApp
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Detection patterns — order matters: fingerprints checked first (exact),
 # then compiled regexes.
 # ---------------------------------------------------------------------------
 _FINGERPRINTS: tuple[str, ...] = (
    "You are an intent classifier",
    "Respond with just the agent name",
    "Summarize these agent results",
    "Available agents:",
    "route to:",
 )
 _PATTERNS: tuple[re.Pattern[str], ...] = (
    re.compile(r"You are (a|an|the)\b.{0,200}", re.IGNORECASE | re.DOTALL),
    re.compile(r"Available agents\s*:", re.IGNORECASE),
    re.compile(r"\bintent classifier\b", re.IGNORECASE),
    re.compile(r'"type"\s*:\s*"function"'),           # LangChain tool schema
    re.compile(r"<(thinking|reasoning|system|prompt)>", re.IGNORECASE),
    re.compile(r"\[INST\]|\[/INST\]"),                # Llama instruct markers
    re.compile(r"route\s+to\s*:", re.IGNORECASE),
    re.compile(r"prompt_template\s*:\s*['\"].{10,}", re.IGNORECASE),
 )
 def _sanitize_text(text: str) -> tuple[str, bool]:
    """Scan *text* for prompt fragments and replace matches with ``[REDACTED]``.
    Returns ``(cleaned_text, was_changed)``.
    """
    # Fingerprint check — if any exact phrase is present, redact the whole string.
    for fp in _FINGERPRINTS:
        if fp in text:
            return "[REDACTED]", True
    changed = False
    for pattern in _PATTERNS:
        new_text, n = pattern.subn("[REDACTED]", text)
        if n:
            text = new_text
            changed = True
    return text, changed
 class SanitizerMiddleware(BaseHTTPMiddleware):
    """Strip prompt IP from /api/v1/chat JSON responses."""
    def __init__(self, app: ASGIApp) -> None:
        super().__init__(app)
    async def dispatch(self, request: Request, call_next) -> Response:  # type: ignore[override]
        response: Response = await call_next(request)
        # Only process chat endpoint responses.
        if not request.url.path.startswith("/api/v1/chat"):
            return response
        # Read body — collect streaming chunks.
        body_bytes = b""
        async for chunk in response.body_iterator:
            body_bytes += chunk if isinstance(chunk, bytes) else chunk.encode()
        # Skip non-JSON bodies (shouldn't happen on /chat, but be safe).
        try:
            body = json.loads(body_bytes.decode("utf-8"))
        except (json.JSONDecodeError, UnicodeDecodeError):
            return Response(
                content=body_bytes,
                status_code=response.status_code,
                headers=dict(response.headers),
                media_type=response.media_type,
            )
        if not isinstance(body, dict):
            return Response(
                content=body_bytes,
                status_code=response.status_code,
                headers=dict(response.headers),
                media_type=response.media_type,
            )
        # Walk top-level string fields and sanitise.
        sanitised_fields: list[str] = []
        for key, value in body.items():
            if isinstance(value, str):
                cleaned, changed = _sanitize_text(value)
                if changed:
                    body[key] = cleaned
                    sanitised_fields.append(key)
        if sanitised_fields:
            logger.warning(
                "Sanitizer redacted prompt fragments",
                extra={
                    "path": request.url.path,
                    "fields": sanitised_fields,
                },
            )
        new_body = json.dumps(body).encode("utf-8")
        headers = dict(response.headers)
        headers["content-length"] = str(len(new_body))
        return Response(
            content=new_body,
            status_code=response.status_code,
            headers=headers,
            media_type="application/json",
        )
--- a/services/billing/app/init.py
+++ b/services/billing/app/init.py
--- a/app/api/routes/auth.py
+++ b/app/api/routes/auth.py
@@ -0,0 +1,795 @@
 """Auth routes: register, login, refresh, me, OAuth social login, onboarding.
 Users and refresh tokens are persisted in PostgreSQL (users + refresh_tokens
 tables).  Passwords are hashed with bcrypt; refresh tokens are stored as
 SHA-256 hashes so plaintext never reaches the DB.
 OAuth (Google):
  GET  /auth/oauth/{provider}/authorize  — returns consent-screen URL + state
  POST /auth/oauth/{provider}/callback   — exchanges code, issues JWT tokens
 """
 from __future__ import annotations
 import hashlib
 import json
 import time
 import urllib.parse
 import uuid
 from datetime import datetime, timedelta, timezone
 from typing import Literal
 import bcrypt
 from cryptography.fernet import Fernet
 from fastapi import APIRouter, Depends, HTTPException, status
 from fastapi.responses import RedirectResponse
 from jose import jwt
 from pydantic import BaseModel, Field
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.api.deps import get_current_user
 from app.auth.oauth_providers import GoogleOAuthProvider, generate_pkce_pair
 from app.config.settings import settings
 from app.core.llm import get_llm
 from app.core.memory_middleware import MemoryMiddleware
 from app.db import get_session
 from app.models import OAuthAccount, RefreshToken, User
 from app.schemas import AuthTokens, UserProfile
 router = APIRouter(prefix="/auth", tags=["auth"])
 # ── OAuth provider registry ───────────────────────────────────────────
 def _get_google_provider() -> GoogleOAuthProvider:
    if not settings.GOOGLE_AUTH_CLIENT_ID or not settings.GOOGLE_AUTH_CLIENT_SECRET:
        raise HTTPException(
            status.HTTP_503_SERVICE_UNAVAILABLE,
            "Google login is not configured on this server",
        )
    return GoogleOAuthProvider(
        client_id=settings.GOOGLE_AUTH_CLIENT_ID,
        client_secret=settings.GOOGLE_AUTH_CLIENT_SECRET,
        redirect_uri=settings.OAUTH_REDIRECT_URI,
    )
 _PROVIDERS = {"google": _get_google_provider}
 # In-memory state store: state → (code_verifier, expires_at_epoch_s)
 # Production note: replace with Redis for multi-process deployments.
 _pending_states: dict[str, tuple[str, float]] = {}
 _STATE_TTL_SECONDS = 600  # 10 minutes
 # ── Internal helpers ─────────────────────────────────────────────────
 def _hash_password(password: str) -> str:
    return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
 def _verify_password(password: str, hashed: str) -> bool:
    return bcrypt.checkpw(password.encode(), hashed.encode())
 def _hash_token(plain_token: str) -> str:
    """SHA-256 of the plain refresh token string."""
    return hashlib.sha256(plain_token.encode()).hexdigest()
 def _make_access_token(user_id: str, email: str, tier: str) -> tuple[str, int]:
    """Return (signed JWT, expires_at_ms)."""
    now = int(time.time())
    exp = now + settings.JWT_ACCESS_TOKEN_EXPIRE_MINUTES * 60
    payload = {
        "sub": user_id,
        "email": email,
        "tier": tier,
        "exp": exp,
        "iat": now,
    }
    token = jwt.encode(payload, settings.JWT_SECRET, algorithm=settings.JWT_ALGORITHM)
    return token, exp * 1000  # ms for client
 # ── Request bodies ────────────────────────────────────────────────────
 class _RegisterRequest(BaseModel):
    email: str
    password: str
    name: str | None = None
    surname: str | None = None
 class _LoginRequest(BaseModel):
    email: str
    password: str
 class _RefreshRequest(BaseModel):
    refresh_token: str
 # ── Routes ────────────────────────────────────────────────────────────
@router.post("/register", response_model=AuthTokens, status_code=status.HTTP_201_CREATED)
 async def register(
    body: _RegisterRequest,
    db: AsyncSession = Depends(get_session),
 ) -> AuthTokens:
    """Create a new account and return JWT tokens."""
    existing = await db.execute(select(User).where(User.email == body.email))
    if existing.scalar_one_or_none() is not None:
        raise HTTPException(status.HTTP_409_CONFLICT, "Email already registered")
    user = User(
        id=str(uuid.uuid4()),
        email=body.email,
        name=body.name,
        surname=body.surname,
        password_hash=_hash_password(body.password),
        tier="free",
        encryption_key=Fernet.generate_key().decode(),
    )
    db.add(user)
    await db.flush()  # get user.id without committing
    plain_token = str(uuid.uuid4())
    expires_at = datetime.now(timezone.utc) + timedelta(
        days=settings.JWT_REFRESH_TOKEN_EXPIRE_DAYS
    )
    rt = RefreshToken(
        user_id=user.id,
        token_hash=_hash_token(plain_token),
        expires_at=expires_at,
    )
    db.add(rt)
    await db.commit()
    access_token, expires_at_ms = _make_access_token(user.id, user.email, user.tier)
    return AuthTokens(
        access_token=access_token,
        refresh_token=plain_token,
        expires_at=expires_at_ms,
    )
@router.post("/login", response_model=AuthTokens)
 async def login(
    body: _LoginRequest,
    db: AsyncSession = Depends(get_session),
 ) -> AuthTokens:
    """Validate credentials and return JWT tokens."""
    result = await db.execute(select(User).where(User.email == body.email))
    user = result.scalar_one_or_none()
    if user is None or not _verify_password(body.password, user.password_hash):
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid credentials")
    plain_token = str(uuid.uuid4())
    expires_at = datetime.now(timezone.utc) + timedelta(
        days=settings.JWT_REFRESH_TOKEN_EXPIRE_DAYS
    )
    rt = RefreshToken(
        user_id=user.id,
        token_hash=_hash_token(plain_token),
        expires_at=expires_at,
    )
    db.add(rt)
    await db.commit()
    access_token, expires_at_ms = _make_access_token(user.id, user.email, user.tier)
    return AuthTokens(
        access_token=access_token,
        refresh_token=plain_token,
        expires_at=expires_at_ms,
    )
@router.post("/refresh", response_model=AuthTokens)
 async def refresh(
    body: _RefreshRequest,
    db: AsyncSession = Depends(get_session),
 ) -> AuthTokens:
    """Rotate a refresh token and return a new token pair."""
    token_hash = _hash_token(body.refresh_token)
    result = await db.execute(
        select(RefreshToken).where(RefreshToken.token_hash == token_hash)
    )
    rt = result.scalar_one_or_none()
    now = datetime.now(timezone.utc)
    if rt is None or rt.expires_at.replace(tzinfo=timezone.utc) < now:
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid or expired refresh token")
    # Rotate: delete old token, issue new one.
    await db.delete(rt)
    user_result = await db.execute(select(User).where(User.id == rt.user_id))
    user = user_result.scalar_one_or_none()
    if user is None:
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "User not found")
    plain_token = str(uuid.uuid4())
    new_expires = now + timedelta(days=settings.JWT_REFRESH_TOKEN_EXPIRE_DAYS)
    new_rt = RefreshToken(
        user_id=user.id,
        token_hash=_hash_token(plain_token),
        expires_at=new_expires,
    )
    db.add(new_rt)
    await db.commit()
    access_token, expires_at_ms = _make_access_token(user.id, user.email, user.tier)
    return AuthTokens(
        access_token=access_token,
        refresh_token=plain_token,
        expires_at=expires_at_ms,
    )
 class _UpdateProfileRequest(BaseModel):
    name: str | None = None
    surname: str | None = None
@router.get("/me", response_model=UserProfile)
 async def me(current_user: UserProfile = Depends(get_current_user)) -> UserProfile:
    """Return the profile for the authenticated user."""
    return current_user
@router.put("/me", response_model=UserProfile)
 async def update_profile(
    body: _UpdateProfileRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> UserProfile:
    """Update the authenticated user's name and surname."""
    result = await db.execute(select(User).where(User.id == current_user.id))
    user = result.scalar_one()
    if body.name is not None:
        user.name = body.name
    if body.surname is not None:
        user.surname = body.surname
    await db.commit()
    await db.refresh(user)
    return UserProfile(
        id=user.id,
        email=user.email,
        name=user.name,
        surname=user.surname,
        avatar_url=user.avatar_url,
        tier=current_user.tier,
    )
 # ── OAuth helpers ─────────────────────────────────────────────────────
 async def _issue_refresh_token(user: User, db: AsyncSession) -> tuple[str, AuthTokens]:
    """Create a refresh token row and return (plain_token, AuthTokens)."""
    plain_token = str(uuid.uuid4())
    expires_at = datetime.now(timezone.utc) + timedelta(
        days=settings.JWT_REFRESH_TOKEN_EXPIRE_DAYS
    )
    rt = RefreshToken(
        user_id=user.id,
        token_hash=_hash_token(plain_token),
        expires_at=expires_at,
    )
    db.add(rt)
    access_token, expires_at_ms = _make_access_token(user.id, user.email, user.tier)
    return plain_token, AuthTokens(
        access_token=access_token,
        refresh_token=plain_token,
        expires_at=expires_at_ms,
    )
 # ── OAuth request/response schemas ───────────────────────────────────
 class _OAuthAuthorizeResponse(BaseModel):
    url: str
    state: str
 class _OAuthCallbackRequest(BaseModel):
    code: str
    state: str
 # ── OAuth routes ──────────────────────────────────────────────────────
@router.get(
    "/oauth/{provider}/web-callback",
    summary="Web-facing OAuth redirect — bounces to the adiuvai:// deep link",
    include_in_schema=False,
 )
 async def oauth_web_callback(
    provider: Literal["google"],
    code: str,
    state: str,
 ) -> RedirectResponse:
    """Google redirects here after user consent.
    This endpoint immediately redirects to the Electron deep-link URI so the
    desktop app receives the authorization code.  It is intentionally simple —
    no state validation here (the Electron app + backend callback do that).
    Registered in Google Cloud Console as:
      http://localhost:8000/api/v1/auth/oauth/google/web-callback  (dev)
      https://api.adiuvai.com/api/v1/auth/oauth/google/web-callback  (prod)
    """
    params = urllib.parse.urlencode({"code": code, "state": state, "provider": provider})
    deep_link = f"adiuvai://oauth/callback?{params}"
    return RedirectResponse(url=deep_link, status_code=302)
@router.get(
    "/oauth/{provider}/authorize",
    response_model=_OAuthAuthorizeResponse,
    summary="Start OAuth flow — returns the provider consent-screen URL",
 )
 async def oauth_authorize(
    provider: Literal["google"],
 ) -> _OAuthAuthorizeResponse:
    """Generate a PKCE state + code_challenge and return the authorization URL.
    The client opens this URL in the system browser.  After the user grants
    consent, the provider redirects to the deep-link URI (adiuvai://oauth/callback)
    with ``code`` and ``state`` query params.  The client then calls
    ``POST /auth/oauth/{provider}/callback`` with those values.
    """
    provider_factory = _PROVIDERS.get(provider)
    if provider_factory is None:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, f"Unknown provider: {provider}")
    oauth_provider = provider_factory()
    state = str(uuid.uuid4())
    code_verifier, code_challenge = generate_pkce_pair()
    # Purge expired states to prevent unbounded growth.
    now = time.time()
    expired = [s for s, (_, exp) in _pending_states.items() if exp < now]
    for s in expired:
        del _pending_states[s]
    _pending_states[state] = (code_verifier, now + _STATE_TTL_SECONDS)
    url = oauth_provider.get_authorization_url(state=state, code_challenge=code_challenge)
    return _OAuthAuthorizeResponse(url=url, state=state)
@router.post(
    "/oauth/{provider}/callback",
    response_model=AuthTokens,
    summary="Complete OAuth flow — exchange code and issue JWT tokens",
 )
 async def oauth_callback(
    provider: Literal["google"],
    body: _OAuthCallbackRequest,
    db: AsyncSession = Depends(get_session),
 ) -> AuthTokens:
    """Validate state, exchange the authorization code, and sign in (or register) the user.
    Resolution order:
      1. ``oauth_accounts`` row match → existing user, log in.
      2. Email match + ``email_verified=True`` → link OAuth account to existing user.
      3. No match → create new user (password_hash=None, avatar from provider).
    """
    provider_factory = _PROVIDERS.get(provider)
    if provider_factory is None:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, f"Unknown provider: {provider}")
    # Validate state (CSRF protection).
    now = time.time()
    entry = _pending_states.pop(body.state, None)
    if entry is None or entry[1] < now:
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid or expired OAuth state")
    code_verifier, _ = entry
    oauth_provider = provider_factory()
    # Exchange code for tokens.
    try:
        token_data = await oauth_provider.exchange_code(
            code=body.code,
            code_verifier=code_verifier,
            redirect_uri=settings.OAUTH_REDIRECT_URI,
        )
    except Exception:
        raise HTTPException(
            status.HTTP_400_BAD_REQUEST, "Failed to exchange authorization code"
        )
    access_token_google = token_data.get("access_token")
    if not access_token_google:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "No access token in provider response")
    # Fetch user identity.
    try:
        userinfo = await oauth_provider.get_userinfo(access_token_google)
    except Exception:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "Failed to fetch user info from provider")
    # ── Resolution order ──────────────────────────────────────────────
    # 1. Existing OAuth link?
    oauth_result = await db.execute(
        select(OAuthAccount).where(
            OAuthAccount.provider == provider,
            OAuthAccount.provider_user_id == userinfo.provider_user_id,
        )
    )
    oauth_account = oauth_result.scalar_one_or_none()
    if oauth_account is not None:
        user_result = await db.execute(select(User).where(User.id == oauth_account.user_id))
        user = user_result.scalar_one()
        # Backfill avatar if the user doesn't have one yet.
        if user.avatar_url is None and userinfo.avatar_url:
            user.avatar_url = userinfo.avatar_url
            await db.commit()
        plain_token, tokens = await _issue_refresh_token(user, db)
        await db.commit()
        return tokens
    # 2. Email match with a verified Google email → link accounts.
    if userinfo.email_verified:
        email_result = await db.execute(select(User).where(User.email == userinfo.email))
        existing_user = email_result.scalar_one_or_none()
        if existing_user is not None:
            new_link = OAuthAccount(
                user_id=existing_user.id,
                provider=provider,
                provider_user_id=userinfo.provider_user_id,
                provider_email=userinfo.email,
            )
            db.add(new_link)
            if existing_user.avatar_url is None and userinfo.avatar_url:
                existing_user.avatar_url = userinfo.avatar_url
            plain_token, tokens = await _issue_refresh_token(existing_user, db)
            await db.commit()
            return tokens
    # Guard: if the email is already taken but we couldn't auto-link (e.g.
    # email_verified=False), refuse with 409 instead of hitting a DB constraint.
    if not userinfo.email_verified:
        conflict = await db.execute(select(User).where(User.email == userinfo.email))
        if conflict.scalar_one_or_none() is not None:
            raise HTTPException(
                status.HTTP_409_CONFLICT,
                "An account with this email already exists. "
                "Please sign in with your password.",
            )
    # 3. New user — social-only account (no password).
    new_user = User(
        id=str(uuid.uuid4()),
        email=userinfo.email,
        name=userinfo.name,
        password_hash=None,
        avatar_url=userinfo.avatar_url,
        tier="free",
        encryption_key=Fernet.generate_key().decode(),
    )
    db.add(new_user)
    await db.flush()  # populate new_user.id
    new_oauth = OAuthAccount(
        user_id=new_user.id,
        provider=provider,
        provider_user_id=userinfo.provider_user_id,
        provider_email=userinfo.email,
    )
    db.add(new_oauth)
    plain_token, tokens = await _issue_refresh_token(new_user, db)
    await db.commit()
    return tokens
 # ── Onboarding helpers ────────────────────────────────────────────────
 async def _build_profile(user_id: str, email: str, db: AsyncSession) -> UserProfile:
    """Re-fetch and return a full UserProfile (reuses get_current_user logic)."""
    # We can't call the FastAPI dependency directly, but we can replicate
    # the core logic inline.  Instead, we just re-query the same way.
    from app.models import Subscription  # noqa: PLC0415
    result = await db.execute(
        select(Subscription.tier).where(Subscription.user_id == user_id)
    )
    default_tier = "power" if settings.ENV == "dev" else "free"
    tier: str = result.scalar_one_or_none() or default_tier
    user_result = await db.execute(
        select(
            User.name, User.surname, User.avatar_url, User.onboarding_completed_at,
            User.password_hash,
        ).where(User.id == user_id)
    )
    user_row = user_result.one_or_none()
    onboarding_ms: int | None = None
    if user_row and user_row.onboarding_completed_at is not None:
        onboarding_ms = int(user_row.onboarding_completed_at.timestamp() * 1000)
    memory_dict: dict[str, str] = {}
    try:
        mw = MemoryMiddleware(db)
        blocks = await mw.list_core_blocks(user_id)
        memory_dict = {b["label"]: b["value"] for b in blocks}
    except Exception:
        pass
    return UserProfile(
        id=user_id,
        email=email,
        name=user_row.name if user_row else None,
        surname=user_row.surname if user_row else None,
        avatar_url=user_row.avatar_url if user_row else None,
        has_password=bool(user_row.password_hash) if user_row else False,
        tier=tier,
        onboarding_completed_at=onboarding_ms,
        memory=memory_dict,
    )
 # ── Onboarding routes ────────────────────────────────────────────────
 class _UpdateMemoryRequest(BaseModel):
    memory: dict[str, str] = Field(default_factory=dict)
    mark_onboarded: bool = False
@router.put("/me/memory", response_model=UserProfile)
 async def update_memory(
    body: _UpdateMemoryRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> UserProfile:
    """Update core memory key/value pairs and optionally mark onboarding complete."""
    mw = MemoryMiddleware(db)
    for key, value in body.memory.items():
        await mw.update_core(current_user.id, key, value)
    if body.mark_onboarded:
        result = await db.execute(select(User).where(User.id == current_user.id))
        user = result.scalar_one()
        user.onboarding_completed_at = datetime.now(timezone.utc)
        await db.commit()
    return await _build_profile(current_user.id, current_user.email, db)
@router.post("/me/onboarding/reset")
 async def reset_onboarding(
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ):
    """Reset onboarding so the wizard runs again on next login."""
    result = await db.execute(select(User).where(User.id == current_user.id))
    user = result.scalar_one()
    user.onboarding_completed_at = None
    await db.commit()
    return {"status": "reset"}
 class _NormalizeRequest(BaseModel):
    inputs: dict[str, str]
 class _NormalizeResponse(BaseModel):
    normalized: dict[str, str]
@router.post("/onboarding/normalize", response_model=_NormalizeResponse)
 async def normalize_onboarding(
    body: _NormalizeRequest,
    current_user: UserProfile = Depends(get_current_user),
 ) -> _NormalizeResponse:
    """One-shot LLM normalization for free-text onboarding answers."""
    if not body.inputs:
        return _NormalizeResponse(normalized={})
    try:
        llm = get_llm(model="gpt-4o-mini", temperature=0)
        prompt = (
            "You normalize user onboarding answers into clean, ≤3-word canonical labels.\n"
            "Return a JSON object with the same keys and normalized values.\n"
            "Examples: 'i build websites' → 'Web Developer', 'tech-ish stuff' → 'Technology'\n"
            f"Input: {json.dumps(body.inputs)}"
        )
        response = await llm.ainvoke(
            [
                {"role": "system", "content": "You normalize user inputs. Return JSON only."},
                {"role": "user", "content": prompt},
            ],
        )
        normalized = json.loads(response.content)
        return _NormalizeResponse(normalized=normalized)
    except Exception:
        # LLM failure must never block onboarding — return inputs unchanged
        return _NormalizeResponse(normalized=body.inputs)
 # ── Password management ───────────────────────────────────────────────
 class _ChangePasswordRequest(BaseModel):
    current_password: str = Field(min_length=1)
    new_password: str = Field(min_length=8)
@router.put("/me/password", status_code=status.HTTP_200_OK)
 async def change_password(
    body: _ChangePasswordRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> dict[str, bool]:
    """Change the authenticated user's password.
    Requires the current password for verification.
    Returns 400 for social-only users (no password set).
    """
    result = await db.execute(select(User).where(User.id == current_user.id))
    user = result.scalar_one()
    if user.password_hash is None:
        raise HTTPException(
            status.HTTP_400_BAD_REQUEST,
            "This account uses social login and has no password to change",
        )
    if not _verify_password(body.current_password, user.password_hash):
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "Current password is incorrect")
    user.password_hash = _hash_password(body.new_password)
    await db.commit()
    return {"ok": True}
 # ── OAuth account management ─────────────────────────────────────────
@router.get("/me/oauth-accounts", response_model=list[dict])
 async def list_oauth_accounts(
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> list[dict]:
    """List all OAuth providers linked to the authenticated user."""
    result = await db.execute(
        select(OAuthAccount).where(OAuthAccount.user_id == current_user.id)
    )
    accounts = result.scalars().all()
    return [
        {
            "provider": a.provider,
            "provider_email": a.provider_email,
            "created_at": int(a.created_at.timestamp() * 1000),
        }
        for a in accounts
    ]
@router.delete("/me/oauth-accounts/{provider}", status_code=status.HTTP_200_OK)
 async def unlink_oauth_account(
    provider: str,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> dict[str, bool]:
    """Unlink an OAuth provider from the authenticated user.
    Refuses if the user has no password and this is their only login method.
    """
    result = await db.execute(select(User).where(User.id == current_user.id))
    user = result.scalar_one()
    oauth_result = await db.execute(
        select(OAuthAccount).where(
            OAuthAccount.user_id == current_user.id,
            OAuthAccount.provider == provider,
        )
    )
    account = oauth_result.scalar_one_or_none()
    if account is None:
        raise HTTPException(status.HTTP_404_NOT_FOUND, f"No linked {provider} account found")
    # Safety: don't let users lock themselves out.
    all_oauth = await db.execute(
        select(OAuthAccount).where(OAuthAccount.user_id == current_user.id)
    )
    oauth_count = len(all_oauth.scalars().all())
    if user.password_hash is None and oauth_count <= 1:
        raise HTTPException(
            status.HTTP_400_BAD_REQUEST,
            "Cannot unlink the only login method. Set a password first.",
        )
    await db.delete(account)
    await db.commit()
    return {"ok": True}
 # ── Avatar update ─────────────────────────────────────────────────────
 class _UpdateAvatarRequest(BaseModel):
    avatar_url: str = Field(min_length=1)
@router.put("/me/avatar", response_model=UserProfile)
 async def update_avatar(
    body: _UpdateAvatarRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> UserProfile:
    """Update the authenticated user's avatar URL.
    Accepts {"avatar_url": "https://..."} — the client uploads the image
    to its own storage and passes the resulting URL here.
    """
    if not body.avatar_url.startswith(("https://", "http://", "data:image/")):
        raise HTTPException(status.HTTP_400_BAD_REQUEST, "Invalid avatar URL")
    result = await db.execute(select(User).where(User.id == current_user.id))
    user = result.scalar_one()
    user.avatar_url = body.avatar_url
    await db.commit()
    return await _build_profile(current_user.id, current_user.email, db)
 # ── Account deletion ─────────────────────────────────────────────────
@router.delete("/me", status_code=status.HTTP_200_OK)
 async def delete_account(
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> dict[str, bool]:
    """Permanently delete the authenticated user's account.
    Cascades: refresh tokens, OAuth accounts, subscription, and all memory
    rows are deleted via SQLAlchemy relationship cascades.  Stripe subscription
    is cancelled if active.
    """
    # Cancel Stripe subscription if present.
    try:
        from app.billing.stripe_service import stripe_service  # noqa: PLC0415
        await stripe_service.cancel_subscription(current_user.id, db)
    except HTTPException:
        pass  # No subscription — that's fine
    # Delete all memory rows (core, associative, episodic, proactive).
    try:
        from app.models import (  # noqa: PLC0415
            MemoryAssociative, MemoryCore, MemoryEpisodic, MemoryProactive,
        )
        for model in (MemoryCore, MemoryAssociative, MemoryEpisodic, MemoryProactive):
            await db.execute(
                model.__table__.delete().where(model.user_id == current_user.id)
            )
    except Exception:
        pass  # Non-critical — cascade on User will handle most
    # Delete the user row — cascades handle refresh_tokens, oauth_accounts, subscription.
    result = await db.execute(select(User).where(User.id == current_user.id))
    user = result.scalar_one()
    await db.delete(user)
    await db.commit()
    return {"ok": True}
--- a/app/api/routes/billing.py
+++ b/app/api/routes/billing.py
@@ -0,0 +1,132 @@
 """Billing routes: Stripe checkout, webhook, subscription management.
 Business logic lives in ``app.billing.stripe_service.StripeService``.
 The route layer handles HTTP concerns (request parsing, response shaping)
 and delegates everything else to the service singleton.
 """
 from __future__ import annotations
 from typing import Any
 from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
 from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.api.deps import get_current_user
 from app.billing.stripe_service import stripe_service
 from app.db import get_session
 from app.schemas import BillingTier, UserProfile
 router = APIRouter(prefix="/billing", tags=["billing"])
 # ── Request bodies ─────────────────────────────────────────────────────
 class _CheckoutRequest(BaseModel):
    tier: BillingTier
 # ── Routes ─────────────────────────────────────────────────────────────
@router.post("/checkout", response_model=dict)
 async def create_checkout(
    body: _CheckoutRequest,
    current_user: UserProfile = Depends(get_current_user),
 ) -> dict[str, str]:
    """Create a Stripe checkout session for a tier upgrade.
    Returns a stub URL when ``STRIPE_SECRET_KEY`` is not configured.
    """
    url = stripe_service.create_checkout_session(current_user.id, body.tier)
    return {"checkout_url": url}
@router.post("/webhook", response_model=dict)
 async def stripe_webhook(
    request: Request,
    stripe_signature: str = Header(default="", alias="Stripe-Signature"),
    db: AsyncSession = Depends(get_session),
 ) -> dict[str, bool]:
    """Handle Stripe webhook events.
    No JWT auth — authenticated via Stripe signature verification instead.
    Returns 200 immediately when Stripe is not configured (local dev).
    """
    payload = await request.body()
    await stripe_service.handle_webhook(payload, stripe_signature, db)
    return {"ok": True}
@router.get("/subscription", response_model=dict)
 async def get_subscription(
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> dict[str, Any]:
    """Return the current subscription info for the authenticated user."""
    sub = await stripe_service.get_subscription(current_user.id, db)
    if sub is None:
        return {
            "tier": current_user.tier,
            "status": "free",
            "stripe_subscription_id": None,
            "current_period_end": None,
        }
    return sub
@router.delete("/subscription", response_model=dict, status_code=status.HTTP_200_OK)
 async def cancel_subscription(
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> dict[str, bool]:
    """Cancel the active subscription."""
    await stripe_service.cancel_subscription(current_user.id, db)
    return {"ok": True}
@router.get("/invoices", response_model=list[dict])
 async def list_invoices(
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> list[dict[str, Any]]:
    """Return billing history (invoices) from Stripe.
    Returns an empty list when Stripe is not configured.
    """
    invoices = await stripe_service.list_invoices(current_user.id, db)
    return invoices
 # ── Quota check ────────────────────────────────────────────────────────
 from app.billing.quota import check_folder_quota, QuotaExceeded  # noqa: E402
 class QuotaCheckRequest(BaseModel):
    feature: str
    estimated_files: int
@router.post("/quota/check")
 async def quota_check(
    payload: QuotaCheckRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> dict:
    """Pre-flight folder quota check. 402 if tier limits would be exceeded."""
    if payload.feature != "folder_index":
        raise HTTPException(status_code=400, detail="Unknown feature")
    try:
        await check_folder_quota(
            user_id=current_user.id,
            tier=current_user.tier,
            estimated_files=payload.estimated_files,
            db=db,
        )
    except QuotaExceeded as exc:
        raise HTTPException(
            status_code=402,
            detail={"reason": exc.reason, "message": str(exc)},
        )
    return {"ok": True}
--- a/app/api/routes/chat.py
+++ b/app/api/routes/chat.py
@@ -0,0 +1,116 @@
 """Chat routes: POST /chat (REST fallback) and POST /chat/embed (text → vector).
 WebSocket chat is handled by the unified device WS endpoint (/api/v1/ws/device).
 """
 from __future__ import annotations
 import uuid
 from typing import Literal
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from app.api.deps import get_current_user
 from app.core.brief_agent import run_home_brief, run_project_brief
 from app.core.deep_agent import run_home
 from app.core.llm import embed
 from app.core.memory_middleware import MemoryMiddleware
 from app.db import async_session
 from app.schemas import ChatRequest, UserProfile
 router = APIRouter(prefix="/chat", tags=["chat"])
 # ── Embed helpers ─────────────────────────────────────────────────────────
 class _EmbedRequest(BaseModel):
    text: str
 class _EmbedResponse(BaseModel):
    vector: list[float]
 # ── Endpoints ─────────────────────────────────────────────────────────────
@router.post("")
 async def chat(
    body: ChatRequest,
    current_user: UserProfile = Depends(get_current_user),
 ) -> JSONResponse:
    """REST fallback for home chat when websocket streaming is unavailable."""
    response = await run_home(
        user_id=current_user.id,
        message=body.message,
        context=body.context.model_dump(),
    )
    return JSONResponse(content={"response": response})
 class _BriefRequest(BaseModel):
    mode: Literal["home", "project"]
    project_id: str | None = None
 class _BriefResponse(BaseModel):
    response: str
@router.post("/brief", response_model=_BriefResponse)
 async def brief(
    body: _BriefRequest,
    current_user: UserProfile = Depends(get_current_user),
 ) -> _BriefResponse:
    """REST fallback for brief when the device WebSocket is not ready."""
    if body.mode == "project":
        if not body.project_id:
            raise HTTPException(status_code=422, detail="project_id required for project mode")
        try:
            uuid.UUID(body.project_id)
        except ValueError:
            raise HTTPException(status_code=422, detail="project_id must be a valid UUID")
    request_id = str(uuid.uuid4())
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        memory_context = await memory.enrich_context(
            current_user.id,
            "",
            trace_id=request_id,
            session_id=request_id,
        )
    context: dict = {
        "_debug": {"request_id": request_id, "user_id": current_user.id},
        **memory_context,
    }
    chunks: list[str] = []
    if body.mode == "project":
        stream = run_project_brief(current_user.id, body.project_id, context)  # type: ignore[arg-type]
    else:
        stream = run_home_brief(current_user.id, context)
    async for event_type, data in stream:
        if event_type == "token" and data:
            chunks.append(str(data))
    return _BriefResponse(response="".join(chunks))
@router.post("/embed", response_model=_EmbedResponse)
 async def embed_text(
    body: _EmbedRequest,
    current_user: UserProfile = Depends(get_current_user),
 ) -> _EmbedResponse:
    """Generate a 1536-dim embedding vector for the given text.
    Uses ``text-embedding-3-small`` via OpenAI.  Auth required (JWT).
    Used by Electron (vectordb.ts) for local note search.
    """
    vector = await embed(body.text)
    return _EmbedResponse(vector=vector)
--- a/app/api/routes/device_ws.py
+++ b/app/api/routes/device_ws.py
@@ -0,0 +1,864 @@
 """Device WebSocket endpoint.
 Persistent connection from Electron devices to the backend.
  WS  /api/v1/ws/device?token=<jwt>
 Auth: JWT passed as ``?token=`` query parameter (Bearer header is not
 available during the WebSocket handshake).
 Protocol:
  1. Client connects → JWT validated → connection accepted.
  2. Client sends ``device_hello`` frame: ``{ type, device_id, scout_ids }``.
  3. Backend registers the connection in ``DeviceConnectionManager``.
  4. Session enters message dispatch loop + heartbeat.
 Incoming frame dispatch:
  - ``tool_result``      → resolves a pending tool-call Future.
  - ``journey_start``    → starts a guided setup journey session.
  - ``journey_message``  → continues a journey conversation.
  - ``pong``             → heartbeat acknowledgement (updates last-seen).
  - unknown types        → logged, ignored.
 Outgoing heartbeat: ``{ "type": "ping" }`` every 30 s.
 On disconnect:
  - Unregisters from DeviceConnectionManager.
  - Marks all in-progress AgentRunLog rows for this user as ``error``
    with message "device disconnected".
 """
 from __future__ import annotations
 import asyncio
 import json
 import logging
 from uuid import uuid4
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
 from jose import JWTError, jwt
 from sqlalchemy import update
 from app.api.routes.scout_setup import handle_journey_message, handle_journey_start
 from app.config.settings import settings
 from app.scouts.engine import ScoutEngine
 from app.core.scout_runner import trigger_pending_runs
 from app.core.scout_session_buffer import session_buffer
 from app.core.brief_agent import run_home_brief, run_project_brief
 from app.core.deep_agent import run_contextual_stream, run_home_stream, run_task_brief_research_stream
 from app.core.output_formatter import extract_canvas_block
 from app.core.device_manager import device_manager
 from app.core.memory_middleware import MemoryMiddleware
 from app.core.output_formatter import StreamFormatter
 from app.core.ws_context import clear_client_executor, set_client_executor
 from app.db import async_session
 from app.models import ScoutRunLog
 from app.schemas import WsFrameType, WsStreamEnd
 from app.schemas.contextual import ContextualScope, render_scope_block
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/ws", tags=["device-ws"])
 # ── v7 folder index session state ─────────────────────────────────────
 # Keyed by sessionId; value: { user_id, project_id, processed, total, cancelled }
 _index_sessions: dict[str, dict] = {}
 _HEARTBEAT_INTERVAL = 30  # seconds
 _PONG_TIMEOUT = 10  # seconds — grace window after a ping
@router.websocket("/device")
 async def device_ws(websocket: WebSocket) -> None:
    """Persistent WebSocket endpoint for Electron device connections.
    Authentication is via ``?token=<jwt>`` query parameter.
    """
    # ── 1. Authenticate before accepting ─────────────────────────────
    token = websocket.query_params.get("token", "")
    try:
        payload = jwt.decode(
            token, settings.JWT_SECRET, algorithms=[settings.JWT_ALGORITHM]
        )
        user_id: str | None = payload.get("sub")
        if not user_id:
            raise JWTError("missing sub")
    except JWTError:
        await websocket.close(code=1008)  # Policy Violation
        return
    await websocket.accept()
    # ── 2. Await device_hello frame ───────────────────────────────────
    try:
        raw = await asyncio.wait_for(websocket.receive_text(), timeout=15.0)
    except (asyncio.TimeoutError, WebSocketDisconnect):
        await websocket.close(code=1008)
        return
    try:
        hello = json.loads(raw)
        if hello.get("type") != WsFrameType.device_hello:
            raise ValueError("expected device_hello as first frame")
        device_id: str = hello["device_id"]
        scout_ids: list[str] = hello.get("scout_ids", [])
    except (KeyError, ValueError, json.JSONDecodeError) as exc:
        logger.warning("device_ws: invalid device_hello from user=%s: %s", user_id, exc)
        await websocket.close(code=1008)
        return
    # ── 3. Register connection ────────────────────────────────────────
    device_manager.register(user_id, device_id, websocket)
    logger.info(
        "device_ws: connected user=%s device=%s scouts=%s",
        user_id,
        device_id,
        scout_ids,
    )
    # Trigger any overdue agent runs now that the device is connected.
    asyncio.create_task(trigger_pending_runs(user_id, device_id, device_manager))
    # Drain any queued scout proposals and deliver to the client (non-blocking).
    async def _deliver_pending_safe() -> None:
        import uuid as _uuid  # noqa: PLC0415
        try:
            await ScoutEngine().deliver_pending(_uuid.UUID(user_id), websocket)
        except Exception:
            logger.exception("scout deliver_pending failed for user %s", user_id)
    asyncio.create_task(_deliver_pending_safe())
    # ── 4. Concurrent message loop + heartbeat ────────────────────────
    try:
        await asyncio.gather(
            _message_loop(websocket, user_id),
            _heartbeat_loop(websocket),
        )
    except WebSocketDisconnect:
        pass
    except Exception as exc:
        logger.warning("device_ws: unhandled exception user=%s: %s", user_id, exc)
    finally:
        device_manager.unregister(user_id)
        logger.info("device_ws: disconnected user=%s device=%s", user_id, device_id)
        await _mark_runs_disconnected(user_id)
 # ── Message dispatch loop ─────────────────────────────────────────────
 async def _message_loop(websocket: WebSocket, user_id: str) -> None:
    """Receive frames from Electron and dispatch to the appropriate handler."""
    async for raw in websocket.iter_text():
        try:
            frame: dict = json.loads(raw)
        except json.JSONDecodeError:
            logger.warning("device_ws: invalid JSON from user=%s", user_id)
            continue
        frame_type = frame.get("type")
        if frame_type == WsFrameType.tool_result:
            call_id = frame.get("id")
            if call_id:
                device_manager.resolve_pending_call(user_id, call_id, frame)
            else:
                logger.warning(
                    "device_ws: tool_result missing id from user=%s", user_id
                )
        elif frame_type == WsFrameType.home_request:
            asyncio.create_task(
                _handle_home_request(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.brief_request:
            asyncio.create_task(
                _handle_brief_request(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.task_brief_request:
            asyncio.create_task(
                _handle_task_brief_request(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.journey_start:
            asyncio.create_task(
                _handle_journey_start(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.journey_message:
            asyncio.create_task(
                _handle_journey_message(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.index_session_start:
            asyncio.create_task(
                _handle_index_session_start(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.index_file_batch:
            asyncio.create_task(
                _handle_index_file_batch(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.index_session_cancel:
            await _handle_index_session_cancel(websocket, frame)
        elif frame_type == WsFrameType.contextual_request:
            asyncio.create_task(
                _handle_contextual_request(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.contextual_scope_update:
            asyncio.create_task(
                _handle_contextual_scope_update(websocket, user_id, frame)
            )
        elif frame_type == "scout_proposal_ack":
            proposal_id = frame.get("proposal_id")
            if proposal_id:
                try:
                    await ScoutEngine().ack_proposal(proposal_id)
                except Exception:
                    logger.exception("scout ack_proposal failed for %s", proposal_id)
        elif frame_type == "pong":
            # Heartbeat ack — nothing to do, connection is alive.
            pass
        else:
            logger.debug(
                "device_ws: unknown frame type %r from user=%s", frame_type, user_id
            )
 # ── v3 Chat Handlers ──────────────────────────────────────────────────
 async def _make_ws_executor(websocket: WebSocket, user_id: str):
    """Return a callback that sends tool_call frames and awaits tool_result."""
    async def _executor(payload: dict) -> dict:
        payload["type"] = WsFrameType.tool_call
        await websocket.send_text(json.dumps(payload))
        future = device_manager.create_pending_call(user_id, payload["id"])
        return await future
    return _executor
 async def _handle_home_request(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a home_request frame — streams HomeFormatter output back on the socket."""
    request_id = frame.get("request_id") or str(uuid4())
    message: str = frame.get("message", "")
    session_id: str = frame.get("session_id") or str(uuid4())
    project_id: str | None = frame.get("project_id") or frame.get("projectId") or None
    logger.info(
        "device_ws: home_request_start user=%s req=%s session=%s project=%s msg=%s",
        user_id,
        request_id,
        session_id,
        project_id,
        message[:200],
    )
    # ── Memory: enrich context before LLM call ────────────────────────
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        memory_context = await memory.enrich_context(
            user_id,
            message,
            trace_id=request_id,
            session_id=session_id,
        )
    context: dict = {
        "conversation_history": frame.get("conversation_history", []),
        "_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
        "format_prefs": frame.get("format_prefs"),
        **memory_context,
    }
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    response_chunks: list[str] = []
    try:
        event_stream = run_home_stream(user_id, message, context, project_id=project_id)
        formatter = StreamFormatter(request_id=request_id)
        async for ws_frame in formatter.format(event_stream):
            await websocket.send_text(ws_frame.model_dump_json())
            # Collect text chunks to build the full response for episode storage
            if ws_frame.type == "stream_text":  # type: ignore[union-attr]
                response_chunks.append(ws_frame.chunk)  # type: ignore[union-attr]
    except Exception as exc:
        logger.error(
            "device_ws: home_request failed user=%s req=%s: %s",
            user_id, request_id, exc,
        )
    finally:
        clear_client_executor()
    # ── Memory: store episode after response ──────────────────────────
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        await memory.store_episode(
            user_id, session_id, message, "".join(response_chunks), trace_id=request_id
        )
    logger.info(
        "device_ws: home_request_end user=%s req=%s session=%s response_chars=%d",
        user_id,
        request_id,
        session_id,
        len("".join(response_chunks)),
    )
 # ── v8 Contextual Sidebar Handlers ───────────────────────────────────
 def get_session_buffer(user_id: str, session_id: str, channel: str = "contextual"):
    """Return a session-scoped buffer proxy for the given user+session.
    Returns a _ContextualBufferProxy that exposes append_system_message().
    Defined at module level so tests can monkeypatch it.
    The channel kwarg is accepted for forward-compatibility.
    """
    from app.core.scout_session_buffer import ContextualBufferProxy  # noqa: PLC0415
    return ContextualBufferProxy(session_buffer, user_id, session_id)
 async def _handle_contextual_request(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a contextual_request frame — runs the contextual agent and streams frames."""
    request_id = frame.get("request_id") or str(uuid4())
    message: str = frame.get("message", "")
    session_id: str = frame.get("session_id") or str(uuid4())
    scope_payload: dict = frame.get("scope", {})
    logger.info(
        "device_ws: contextual_request_start user=%s req=%s session=%s msg=%s",
        user_id,
        request_id,
        session_id,
        message[:200],
    )
    scope = ContextualScope.model_validate(scope_payload)
    # Enrich context with memory before the LLM call.
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        memory_context = await memory.enrich_context(
            user_id,
            message,
            trace_id=request_id,
            session_id=session_id,
        )
    context: dict = {
        "conversation_history": frame.get("conversation_history", []),
        "format_prefs": frame.get("format_prefs"),
        "_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
        **memory_context,
    }
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    response_chunks: list[str] = []
    try:
        event_stream = run_contextual_stream(
            user_id=user_id,
            message=message,
            context=context,
            scope=scope,
        )
        formatter = StreamFormatter(request_id=request_id)
        async for ws_frame in formatter.format(event_stream):
            await websocket.send_text(ws_frame.model_dump_json())
            if ws_frame.type == "stream_text":  # type: ignore[union-attr]
                response_chunks.append(ws_frame.chunk)  # type: ignore[union-attr]
    except Exception as exc:
        logger.error(
            "device_ws: contextual_request failed user=%s req=%s: %s",
            user_id, request_id, exc,
        )
    finally:
        clear_client_executor()
    # Store episode so the contextual agent can recall prior turns.
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        await memory.store_episode(
            user_id, session_id, message, "".join(response_chunks), trace_id=request_id
        )
    logger.info(
        "device_ws: contextual_request_end user=%s req=%s session=%s response_chars=%d",
        user_id,
        request_id,
        session_id,
        len("".join(response_chunks)),
    )
 async def _handle_contextual_scope_update(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a contextual_scope_update frame.
    Injects a synthetic system message into the session buffer so the next
    agent turn knows the user navigated.  No LLM call is made.
    """
    session_id: str = frame.get("session_id") or str(uuid4())
    scope = ContextualScope.model_validate(frame.get("scope", {}))
    block = render_scope_block(scope)
    buf = get_session_buffer(user_id, session_id, channel="contextual")
    buf.append_system_message(
        f"User navigated to a new view. {block} Treat this as the new active context."
    )
    await websocket.send_text(json.dumps({
        "type": WsFrameType.contextual_scope_ack,
        "session_id": session_id,
    }))
    logger.info(
        "device_ws: contextual_scope_update user=%s session=%s page=%s",
        user_id, session_id, scope.page,
    )
 async def _handle_brief_request(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a brief_request frame — streams plain-text brief back on the socket.
    No episode storage — briefs are not conversations.
    """
    import uuid as _uuid
    request_id = frame.get("request_id") or str(uuid4())
    session_id = frame.get("session_id") or str(uuid4())
    mode: str = frame.get("mode", "home")
    project_id: str | None = frame.get("project_id")
    logger.info(
        "device_ws: brief_request_start user=%s req=%s mode=%s project_id=%s",
        user_id, request_id, mode, project_id,
    )
    # Validate project_id for project mode before touching LLM.
    if mode == "project":
        try:
            if not project_id:
                raise ValueError("project_id required for project mode")
            _uuid.UUID(project_id)
        except (ValueError, AttributeError) as exc:
            logger.warning(
                "device_ws: brief_request invalid project_id user=%s req=%s: %s",
                user_id, request_id, exc,
            )
            await websocket.send_text(
                WsStreamEnd(request_id=request_id, error=str(exc)).model_dump_json()
            )
            return
    # Enrich context with memory (no user message — use empty string as probe).
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        memory_context = await memory.enrich_context(
            user_id,
            "",
            trace_id=request_id,
            session_id=session_id,
        )
    context: dict = {
        "_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
        "format_prefs": frame.get("format_prefs"),
        **memory_context,
    }
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    try:
        if mode == "project":
            event_stream = run_project_brief(user_id, project_id, context)  # type: ignore[arg-type]
        else:
            event_stream = run_home_brief(user_id, context)
        formatter = StreamFormatter(request_id=request_id)
        async for ws_frame in formatter.format(event_stream):
            await websocket.send_text(ws_frame.model_dump_json())
    except Exception as exc:
        logger.error(
            "device_ws: brief_request failed user=%s req=%s: %s",
            user_id, request_id, exc,
        )
        await websocket.send_text(
            WsStreamEnd(request_id=request_id, error=str(exc)).model_dump_json()
        )
    finally:
        clear_client_executor()
    logger.info(
        "device_ws: brief_request_end user=%s req=%s mode=%s",
        user_id, request_id, mode,
    )
 # ── v6 Task Brief Handler ────────────────────────────────────────────
 async def _handle_task_brief_request(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a task_brief_request frame — Stage-1 executive assistant deep research.
    Streams the briefing markdown back to the client.
    On stream_end, emits a ``canvas_draft`` mutation if the agent produced one.
    """
    request_id = frame.get("request_id") or str(uuid4())
    session_id = frame.get("session_id") or str(uuid4())
    task_id: str = frame.get("task_id") or frame.get("taskId") or ""
    project_id: str | None = frame.get("project_id") or frame.get("projectId") or None
    logger.info(
        "device_ws: task_brief_request_start user=%s req=%s task=%s project=%s [cache_miss]",
        user_id, request_id, task_id, project_id,
    )
    if not task_id:
        await websocket.send_text(
            WsStreamEnd(request_id=request_id, error="task_id is required").model_dump_json()
        )
        return
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        memory_context = await memory.enrich_context(
            user_id,
            f"task brief: {task_id}",
            trace_id=request_id,
            session_id=session_id,
        )
    context: dict = {
        "_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
        "format_prefs": frame.get("format_prefs"),
        **memory_context,
    }
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    response_chunks: list[str] = []
    try:
        event_stream = run_task_brief_research_stream(user_id, task_id, context, project_id=project_id)
        formatter = StreamFormatter(request_id=request_id)
        async for ws_frame in formatter.format(event_stream):
            if ws_frame.type == "stream_text":  # type: ignore[union-attr]
                response_chunks.append(ws_frame.chunk)  # type: ignore[union-attr]
                await websocket.send_text(ws_frame.model_dump_json())
            elif ws_frame.type == "stream_start":
                await websocket.send_text(ws_frame.model_dump_json())
            # stream_end is emitted below with mutations — skip formatter's version
    except Exception as exc:
        logger.error(
            "device_ws: task_brief_request failed user=%s req=%s task=%s: %s",
            user_id, request_id, task_id, exc,
        )
        await websocket.send_text(
            WsStreamEnd(request_id=request_id, error=str(exc)).model_dump_json()
        )
        return
    finally:
        clear_client_executor()
    # Extract canvas block then emit stream_end with optional mutations.
    full_response = "".join(response_chunks)
    _visible, canvas_content, canvas_kind = extract_canvas_block(full_response)
    mutations: list[dict] = []
    if canvas_content:
        mutations.append({
            "type": "canvas_draft",
            "content": canvas_content,
            "kind": canvas_kind,
        })
    await websocket.send_text(
        WsStreamEnd(request_id=request_id, mutations=mutations or None).model_dump_json()
    )
    logger.info(
        "device_ws: task_brief_request_end user=%s req=%s task=%s response_chars=%d canvas=%s",
        user_id, request_id, task_id, len(full_response), canvas_kind or "none",
    )
 # ── v4 Journey Handlers ─────────────────────────────────────────────
 async def _handle_journey_start(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a journey_start frame — explores directory and sends first question."""
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    try:
        reply = await handle_journey_start(user_id, frame)
        await websocket.send_text(json.dumps(reply))
    except Exception as exc:
        logger.error(
            "device_ws: journey_start failed user=%s: %s", user_id, exc
        )
        await websocket.send_text(json.dumps({
            "type": "journey_reply",
            "session_id": frame.get("session_id", ""),
            "message": f"Failed to start journey: {exc}",
            "done": True,
            "prompt_template": None,
        }))
    finally:
        clear_client_executor()
 async def _handle_journey_message(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a journey_message frame — continues the journey conversation."""
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    try:
        reply = await handle_journey_message(user_id, frame)
        await websocket.send_text(json.dumps(reply))
    except Exception as exc:
        session_id = frame.get("session_id", "")
        logger.error(
            "device_ws: journey_message failed user=%s session=%s: %s",
            user_id, session_id, exc,
        )
        await websocket.send_text(json.dumps({
            "type": "journey_reply",
            "session_id": session_id,
            "message": f"Journey error: {exc}",
            "done": True,
            "prompt_template": None,
        }))
    finally:
        clear_client_executor()
 # ── v7 Folder Index Handlers ──────────────────────────────────────────
 async def _handle_index_session_start(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Register a new folder index session.  No response sent — client is declaring intent."""
    session_id: str = frame.get("sessionId") or frame.get("session_id") or ""
    project_id: str | None = frame.get("projectId") or frame.get("project_id")
    total: int = int(frame.get("totalFiles") or frame.get("total_files") or 0)
    if not session_id:
        logger.warning("device_ws: index_session_start missing sessionId user=%s", user_id)
        return
    _index_sessions[session_id] = {
        "user_id": user_id,
        "project_id": project_id,
        "processed": 0,
        "total": total,
        "cancelled": False,
    }
    logger.info(
        "device_ws: index_session_start user=%s session=%s project=%s total=%d",
        user_id, session_id, project_id, total,
    )
 async def _handle_index_session_cancel(
    websocket: WebSocket,
    frame: dict,
 ) -> None:
    """Mark a session as cancelled and emit index_session_done(cancelled)."""
    session_id: str = frame.get("sessionId") or frame.get("session_id") or ""
    session = _index_sessions.get(session_id)
    if session:
        session["cancelled"] = True
    await websocket.send_text(json.dumps({
        "type": WsFrameType.index_session_done,
        "sessionId": session_id,
        "status": "cancelled",
    }))
    _index_sessions.pop(session_id, None)
    logger.info("device_ws: index_session_cancel session=%s", session_id)
 async def _handle_index_file_batch(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Process a batch of files for an index session, streaming results back."""
    # Lazy imports to avoid heavy load at module startup.
    from app.core.folder_indexer import (  # noqa: PLC0415
        summarize_image,
        summarize_pdf,
        summarize_docx,
        summarize_text,
    )
    from app.billing.tier_manager import tier_manager  # noqa: PLC0415
    from app.billing.quota import add_token_usage  # noqa: PLC0415
    session_id: str = frame.get("sessionId") or frame.get("session_id") or ""
    files: list[dict] = frame.get("files", [])
    session = _index_sessions.get(session_id)
    if not session or session.get("cancelled"):
        return
    async with async_session() as db:
        tier = await tier_manager.get_tier(user_id, db)
        raw_cap = tier_manager.get_feature_value(tier, "folder_monthly_tokens")
        cap: int | None = None if raw_cap == -1 else raw_cap
        for file_info in files:
            if session.get("cancelled"):
                return
            # Electron's toSnakeCase converts payload keys, so accept both forms.
            rel_path: str = file_info.get("relPath") or file_info.get("rel_path") or ""
            kind: str = file_info.get("kind") or "text"
            content: str = file_info.get("content") or ""
            ext: str = file_info.get("ext") or ""
            mime: str = file_info.get("mime") or "application/octet-stream"
            name: str = rel_path.split("/")[-1] or rel_path
            try:
                if kind == "image":
                    res = await summarize_image(image_b64=content, mime=mime)
                elif kind == "pdf":
                    res = await summarize_pdf(pdf_b64=content, name=name)
                elif kind == "docx":
                    res = await summarize_docx(docx_b64=content, name=name)
                else:
                    res = await summarize_text(content=content, ext=ext, name=name)
            except Exception as exc:
                logger.warning(
                    "device_ws: index_file_batch summarize failed session=%s path=%s: %s",
                    session_id, rel_path, exc,
                )
                await websocket.send_text(json.dumps({
                    "type": WsFrameType.index_file_result,
                    "sessionId": session_id,
                    "relPath": rel_path,
                    "summary": None,
                    "tokensUsed": 0,
                    "error": str(exc),
                }))
                session["processed"] += 1
                continue
            # Account for token usage and check cap.
            usage = await add_token_usage(
                user_id=user_id,
                feature="folder_index",
                tokens=res.tokens_used,
                db=db,
                cap=cap,
            )
            await websocket.send_text(json.dumps({
                "type": WsFrameType.index_file_result,
                "sessionId": session_id,
                "relPath": rel_path,
                "summary": res.summary,
                "tokensUsed": res.tokens_used,
            }))
            session["processed"] += 1
            if usage.exhausted:
                await websocket.send_text(json.dumps({
                    "type": WsFrameType.index_session_done,
                    "sessionId": session_id,
                    "status": "quota_exceeded",
                }))
                _index_sessions.pop(session_id, None)
                logger.info(
                    "device_ws: index_session quota_exceeded user=%s session=%s",
                    user_id, session_id,
                )
                return
        # After processing the batch, emit progress.
        processed = session["processed"]
        total = session["total"]
        await websocket.send_text(json.dumps({
            "type": WsFrameType.index_session_progress,
            "sessionId": session_id,
            "processed": processed,
            "total": total,
        }))
        if processed >= total:
            await websocket.send_text(json.dumps({
                "type": WsFrameType.index_session_done,
                "sessionId": session_id,
                "status": "completed",
            }))
            _index_sessions.pop(session_id, None)
            logger.info(
                "device_ws: index_session_done completed user=%s session=%s processed=%d",
                user_id, session_id, processed,
            )
 # ── Heartbeat ─────────────────────────────────────────────────────────
 async def _heartbeat_loop(websocket: WebSocket) -> None:
    """Send a ping frame every 30 s to keep the connection alive."""
    while True:
        await asyncio.sleep(_HEARTBEAT_INTERVAL)
        await websocket.send_text(json.dumps({"type": "ping"}))
 # ── Disconnect cleanup ────────────────────────────────────────────────
 async def _mark_runs_disconnected(user_id: str) -> None:
    """Mark all in-progress ScoutRunLog rows as 'error' for this user."""
    try:
        async with async_session() as db:
            await db.execute(
                update(ScoutRunLog)
                .where(
                    ScoutRunLog.user_id == user_id,
                    ScoutRunLog.status == "running",
                )
                .values(
                    status="error",
                    errors=["device disconnected"],
                )
            )
            await db.commit()
    except Exception as exc:
        logger.error(
            "device_ws: failed to mark runs as disconnected for user=%s: %s",
            user_id,
            exc,
        )
--- a/app/api/routes/memory.py
+++ b/app/api/routes/memory.py
@@ -0,0 +1,225 @@
 """Memory management routes — view/edit/delete user memory tiers.
 All routes require authentication. Data is always user-scoped.
 """
 from __future__ import annotations
 import logging
 from datetime import datetime, timezone
 from typing import Annotated
 from fastapi import APIRouter, Depends, Header, HTTPException, status
 from pydantic import BaseModel, Field
 from sqlalchemy import delete, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.api.deps import get_current_user
 from app.core.memory_middleware import MemoryMiddleware
 from app.db import get_session
 from app.models import (
    ExtractionQueue,
    MemoryAssociative,
    MemoryCore,
    MemoryEpisodic,
    MemoryProactive,
    MemoryRelation,
 )
 from app.schemas import UserProfile
 router = APIRouter(prefix="/memory", tags=["memory"])
 logger = logging.getLogger(__name__)
 _ALLOWED_PREDICATES = {
    "works_at",
    "reports_to",
    "stakeholder_of",
    "last_contacted_on",
    "owes_followup",
    "manages",
    "collaborates_with",
    "owns",
    "member_of",
    "custom",
 }
 # ── Response schemas ─────────────────────────────────────────────────────────
 class RelationOut(BaseModel):
    id: str
    subject_label: str
    subject_type: str
    predicate: str
    object_label: str
    object_type: str
    confidence: float
    last_confirmed_at: int | None = None  # epoch ms
 class RelationPatch(BaseModel):
    subject_label: str | None = None
    object_label: str | None = None
    predicate: str | None = None
    confidence: float | None = Field(None, ge=0.0, le=1.0)
 class CoreAddBody(BaseModel):
    key: str = Field(..., min_length=1, max_length=255)
    value: str = Field(..., min_length=1)
 # ── Helpers ──────────────────────────────────────────────────────────────────
 def _relation_to_out(row: MemoryRelation) -> RelationOut:
    last_ms: int | None = None
    if row.last_confirmed_at is not None:
        last_ms = int(row.last_confirmed_at.timestamp() * 1000)
    return RelationOut(
        id=row.id,
        subject_label=row.subject_label,
        subject_type=row.subject_type,
        predicate=row.predicate,
        object_label=row.object_label,
        object_type=row.object_type,
        confidence=row.confidence,
        last_confirmed_at=last_ms,
    )
 # ── Routes ───────────────────────────────────────────────────────────────────
@router.get("/core", response_model=dict[str, str])
 async def get_core_memory(
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> dict[str, str]:
    """Return all core memory k/v pairs (plaintext) for the current user."""
    mw = MemoryMiddleware(db)
    blocks = await mw.list_core_blocks(current_user.id)
    return {b["label"]: b["value"] for b in blocks}
@router.delete("/core/{key}", status_code=status.HTTP_204_NO_CONTENT)
 async def delete_core_key(
    key: str,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> None:
    """Delete a single core memory key (GDPR Art. 17)."""
    mw = MemoryMiddleware(db)
    deleted = await mw.delete_core(current_user.id, key)
    if not deleted:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Key not found")
@router.post("/core", status_code=status.HTTP_201_CREATED, response_model=dict[str, str])
 async def add_core_key(
    body: CoreAddBody,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> dict[str, str]:
    """Add or overwrite a core memory key/value pair."""
    mw = MemoryMiddleware(db)
    await mw.update_core(current_user.id, body.key, body.value)
    return {body.key: body.value}
@router.get("/relational", response_model=list[RelationOut])
 async def get_relational_memory(
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> list[RelationOut]:
    """Return all relational memory rows for the current user."""
    mw = MemoryMiddleware(db)
    rows = await mw.query_relations(current_user.id, limit=200)
    return [_relation_to_out(r) for r in rows]
@router.patch("/relational/{relation_id}", response_model=RelationOut)
 async def patch_relation(
    relation_id: str,
    body: RelationPatch,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> RelationOut:
    """Edit a relation row's labels, predicate, or confidence."""
    if body.predicate is not None and body.predicate not in _ALLOWED_PREDICATES:
        raise HTTPException(
            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
            detail=f"predicate must be one of: {sorted(_ALLOWED_PREDICATES)}",
        )
    result = await db.execute(
        select(MemoryRelation).where(
            MemoryRelation.id == relation_id,
            MemoryRelation.user_id == current_user.id,
        )
    )
    row = result.scalar_one_or_none()
    if row is None:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Relation not found")
    if body.subject_label is not None:
        row.subject_label = body.subject_label
    if body.object_label is not None:
        row.object_label = body.object_label
    if body.predicate is not None:
        row.predicate = body.predicate
    if body.confidence is not None:
        row.confidence = body.confidence
        row.last_confirmed_at = datetime.now(timezone.utc)
    await db.commit()
    await db.refresh(row)
    logger.info("memory: patch_relation user=%s relation=%s", current_user.id, relation_id)
    return _relation_to_out(row)
@router.delete("/relational/{relation_id}", status_code=status.HTTP_204_NO_CONTENT)
 async def delete_relation(
    relation_id: str,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> None:
    """Hard-delete a relation row (GDPR Art. 17)."""
    result = await db.execute(
        select(MemoryRelation).where(
            MemoryRelation.id == relation_id,
            MemoryRelation.user_id == current_user.id,
        )
    )
    row = result.scalar_one_or_none()
    if row is None:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Relation not found")
    await db.delete(row)
    await db.commit()
    logger.info("memory: delete_relation user=%s relation=%s", current_user.id, relation_id)
@router.post("/forget-all", status_code=status.HTTP_204_NO_CONTENT)
 async def forget_all(
    x_confirm: Annotated[str | None, Header(alias="X-Confirm")] = None,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> None:
    """Wipe all memory tiers for the current user (GDPR Art. 17).
    Requires ``X-Confirm: true`` header. Does NOT delete the user account.
    """
    if x_confirm != "true":
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Missing or invalid X-Confirm header. Send X-Confirm: true to confirm.",
        )
    uid = current_user.id
    await db.execute(delete(MemoryCore).where(MemoryCore.user_id == uid))
    await db.execute(delete(MemoryAssociative).where(MemoryAssociative.user_id == uid))
    await db.execute(delete(MemoryEpisodic).where(MemoryEpisodic.user_id == uid))
    await db.execute(delete(MemoryProactive).where(MemoryProactive.user_id == uid))
    await db.execute(delete(MemoryRelation).where(MemoryRelation.user_id == uid))
    await db.execute(delete(ExtractionQueue).where(ExtractionQueue.user_id == uid))
    await db.commit()
    logger.warning("memory: forget_all GDPR wipe user=%s", uid)
--- a/app/api/routes/scout_setup.py
+++ b/app/api/routes/scout_setup.py
@@ -0,0 +1,513 @@
 """Chatbot Journey — WS-based guided conversation to build an ScoutConfig.
 The journey is driven entirely through WebSocket frames (no REST endpoints).
 The device WS handler dispatches ``journey_start`` and ``journey_message``
 frames to the functions exported here.
 Journey flow:
  1. FE sends ``journey_start`` frame with basic agent info (directory,
     data_types, schedule).
  2. Server creates an in-memory session, sets up a WS executor so the
     setup LLM can use file-system tools, does a first directory scrape,
     and sends back a ``journey_reply`` with the first question.
  3. FE sends ``journey_message`` frames for each user reply.
  4. Server appends the user message, calls the LLM (which may read files
     via tools), and sends back a ``journey_reply``.
  5. After 3-5 turns the LLM wraps up by emitting an ``ScoutConfig`` JSON
     block delimited by ``AGENT_CONFIG_START`` / ``AGENT_CONFIG_END``.
  6. Server parses and validates the JSON with Pydantic, sends
     ``journey_reply`` with ``done=True`` and the serialised config.
     FE stores it locally.
 """
 from __future__ import annotations
 import json
 import logging
 import time
 import uuid
 from dataclasses import dataclass, field
 from typing import Any
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
 from app.agents.filesystem_agent import make_directory_tools
 from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback, langfuse_context
 from app.core.llm import get_agent_llm, model_for_agent
 from app.schemas import ScoutConfig
 logger = logging.getLogger(__name__)
 # ── Session TTL ───────────────────────────────────────────────────────────
 _SESSION_TTL_SECONDS: int = 1800  # 30 minutes
 # Sentinel strings used to delimit the LLM-produced ScoutConfig JSON.
 _CONFIG_START = "AGENT_CONFIG_START"
 _CONFIG_END = "AGENT_CONFIG_END"
 # Minimum turns before we consider nudging the LLM to wrap up.
 _MIN_TURNS_BEFORE_NUDGE: int = 3
 # Hard cap to avoid infinite loops (safety net, not the primary stopping criterion).
 _MAX_TURNS: int = 15
 # Max tool-calling steps per LLM invocation.
 _MAX_TOOL_STEPS: int = 6
 # ── In-memory session store ───────────────────────────────────────────────
@dataclass
 class JourneySession:
    session_id: str
    user_id: str
    agent_type: str  # "local" | "cloud"
    directory: str
    data_types: list[str]
    history: list[dict[str, Any]] = field(default_factory=list)
    system_prompt: str = ""
    langfuse_prompt: Any = None
    created_at: float = field(default_factory=time.monotonic)
    def is_expired(self) -> bool:
        return (time.monotonic() - self.created_at) > _SESSION_TTL_SECONDS
 # session_id → session
 _sessions: dict[str, JourneySession] = {}
 def get_journey_session(session_id: str, user_id: str) -> JourneySession | None:
    """Retrieve session; return None on missing, expired, or wrong owner."""
    s = _sessions.get(session_id)
    if s is None or s.is_expired():
        _sessions.pop(session_id, None)
        return None
    if s.user_id != user_id:
        return None
    return s
 # ── System prompt ─────────────────────────────────────────────────────────
 _JOURNEY_SYSTEM_PROMPT = """\
 You are a friendly assistant helping a freelancer configure a data-extraction agent.
 Your job is to understand what files the user has in their directory and produce a
 structured ScoutConfig JSON that the extraction agent will use as its instruction set.
 You have access to file-system tools to explore the user's directory:
 - list_directory: see folder structure and file names
 - read_file_content: peek at a file's content
 - get_file_metadata: check file size, extension, dates
 The user's configured directory is: {directory}
 Target data types: {data_types}
 ## Your process
 ### Step 1 — Explore the directory
 Use list_directory and read_file_content to understand what types of files are present
 (HTML emails, plain-text documents, CSVs, etc.).
 ### Step 2 — Identify content types
 For each distinct file type found, decide:
 - A short id (e.g. "email_html", "plain_text", "csv")
 - Which preprocessing handler to use: "email_html" for HTML emails, "generic" for everything else
 - A human-readable label and optional detection_hint
 ### Step 3 — Ask focused questions (one at a time)
 Cover these topics based on what you discovered:
 1. How to map content to entity types (task / note / timeline entry)
 2. Field mapping rules (e.g. email Subject → task title, filename → note title)
 3. Priority or status rules (e.g. "urgent" in subject → high priority)
 4. Date extraction (e.g. "by Friday" → dueDate)
 5. Exclusion rules (e.g. skip newsletters, skip files with no project match)
 ### Step 4 — Produce the ScoutConfig JSON
 Once you are ≥ 90% confident, output the final config between these exact markers
 (each on its own line):
 {config_start}
 {{
  "content_types": [
    {{
      "id": "email_html",
      "label": "Email HTML",
      "detection_hint": "HTML file with From/To/Subject headers",
      "preprocessing": "email_html",
      "extraction_prompt": "Detailed extraction instructions for this content type..."
    }}
  ],
  "global_rules": [
    "If the file cannot be matched to any project, do not create any entity."
  ],
  "data_types": {data_types_json}
 }}
 {config_end}
 ## Rules for the extraction_prompt field
 - Describe when to create a task vs note vs timeline entry (be specific and concrete)
 - Include field mapping rules based on what you found in the directory
 - Include priority/status/date rules if applicable
 - Do NOT include projectId logic — the runner handles project assignment automatically
 - Do NOT mention isAiSuggested — the runner always sets it to 1
 ## Constraints
 - Never ask about projects, projectId, or how to link records to projects
 - Never include projectId or project creation logic in the generated config
 - Keep asking questions until ≥ 90% confident, then output the JSON immediately
 {existing_section}\
 Begin by exploring the directory, then ask your first question.\
 """
 def _build_system_prompt(
    directory: str,
    data_types: list[str],
    existing_config: str | None = None,
 ) -> tuple[str, Any]:
    """Return ``(compiled_system_prompt, langfuse_prompt_obj_or_None)``."""
    existing_section = (
        "\nThe user already has the following ScoutConfig — refine it based on their answers:\n"
        f"```json\n{existing_config}\n```\n"
        if existing_config
        else ""
    )
    template, prompt_obj = get_prompt_or_fallback(
        "journey_system", _JOURNEY_SYSTEM_PROMPT
    )
    compiled = compile_prompt(
        template,
        prompt_obj,
        directory=directory,
        data_types=", ".join(data_types),
        data_types_json=json.dumps(data_types),
        config_start=_CONFIG_START,
        config_end=_CONFIG_END,
        existing_section=existing_section,
    )
    return compiled, prompt_obj
 # ── ScoutConfig extraction ────────────────────────────────────────────────
 def _extract_agent_config(text: str) -> str | None:
    """Return validated ScoutConfig JSON string from between markers, or None.
    Parses the JSON with Pydantic to ensure it conforms to the schema before
    returning.  Returns None if markers are absent or JSON is invalid.
    """
    if _CONFIG_START not in text or _CONFIG_END not in text:
        return None
    start_idx = text.index(_CONFIG_START) + len(_CONFIG_START)
    end_idx = text.index(_CONFIG_END)
    raw = text[start_idx:end_idx].strip()
    if not raw:
        return None
    try:
        parsed = ScoutConfig.model_validate_json(raw)
        return parsed.model_dump_json()
    except Exception as exc:
        logger.warning("agent_setup: failed to parse ScoutConfig JSON: %s", exc)
        return None
 # ── LLM call with tool support ───────────────────────────────────────────
 def _as_text(content: Any) -> str:
    if content is None:
        return ""
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts: list[str] = []
        for item in content:
            if isinstance(item, str):
                parts.append(item)
            elif isinstance(item, dict):
                text = item.get("text")
                if isinstance(text, str):
                    parts.append(text)
        return "".join(parts)
    return str(content)
 async def _call_llm_with_tools(
    system_prompt: str,
    history: list[dict[str, Any]],
    tools: list[Any],
    *,
    user_id: str = "",
    session_id: str = "",
    langfuse_prompt: Any = None,
 ) -> str:
    """Build LangChain messages from history and invoke the LLM with tools.
    Handles tool-calling loops: if the LLM calls tools, execute them and
    continue until a final text response is produced.
    """
    lf = get_langfuse()
    messages: list[Any] = [SystemMessage(content=system_prompt)]
    for turn in history:
        if turn["role"] == "user":
            messages.append(HumanMessage(content=turn["content"]))
        else:
            messages.append(AIMessage(content=turn["content"]))
    llm = get_agent_llm("setup", temperature=0.4)
    llm_with_tools = llm.bind_tools(tools)
    tool_map = {tool_def.name: tool_def for tool_def in tools}
    _lf_ctx = langfuse_context(user_id=user_id or None, session_id=session_id or None)
    _lf_ctx.__enter__()
    _span_ctx = (
        lf.start_as_current_observation(
            as_type="span",
            name="journey-setup",
            input=history[-1]["content"] if history else "",
        )
        if lf else None
    )
    _span = _span_ctx.__enter__() if _span_ctx else None
    try:
        for step in range(_MAX_TOOL_STEPS):
            _gen_ctx = (
                lf.start_as_current_observation(
                    as_type="generation",
                    name="journey-setup-llm",
                    model=model_for_agent("setup"),
                    prompt=langfuse_prompt,
                    input=messages,
                )
                if lf else None
            )
            _gen = _gen_ctx.__enter__() if _gen_ctx else None
            response: AIMessage = await llm_with_tools.ainvoke(messages)
            if _gen_ctx:
                _gen.update(output=_as_text(response.content), usage_details=extract_usage(response))
                _gen_ctx.__exit__(None, None, None)
            resp_text = _as_text(response.content)
            # Guard against empty responses (e.g. model returned finish_reason
            # 'error' which LiteLLM maps to 'stop' with empty content).
            if not response.tool_calls and not resp_text.strip():
                logger.warning(
                    "agent_setup: journey LLM returned empty response at step %d — retrying",
                    step,
                )
                # Drop the empty AIMessage so we don't pollute history, and retry.
                continue
            messages.append(response)
            if not response.tool_calls:
                if _span:
                    _span.update(output=resp_text)
                return resp_text
            for call in response.tool_calls:
                call_name = str(call.get("name", ""))
                call_args = call.get("args", {})
                logger.info(
                    "agent_setup: journey tool_call name=%s args=%s",
                    call_name,
                    json.dumps(call_args, ensure_ascii=True)[:500],
                )
                tool_fn = tool_map.get(call_name)
                if tool_fn is None:
                    tool_output = f"Unknown tool: {call_name}"
                else:
                    tool_output = await tool_fn.ainvoke(call_args)
                logger.info(
                    "agent_setup: journey tool_result name=%s output=%s",
                    call_name,
                    str(tool_output)[:800],
                )
                messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
        # Fallback: exceeded max steps.
        final = await llm.ainvoke(messages)
        final_text = _as_text(final.content)
        if _span:
            _span.update(output=final_text)
        return final_text or (
            "Sorry, I had trouble processing the files. "
            "Could you try again? If the issue persists, the files might be too large for me to analyse."
        )
    finally:
        if _span_ctx:
            _span_ctx.__exit__(None, None, None)
        _lf_ctx.__exit__(None, None, None)
        if lf:
            lf.flush()
 # ── Journey handlers (called from device_ws.py) ──────────────────────────
 async def handle_journey_start(
    user_id: str,
    frame: dict[str, Any],
 ) -> dict[str, Any]:
    """Handle a ``journey_start`` WS frame.
    Creates a session, runs the setup LLM with directory exploration,
    and returns the ``journey_reply`` payload.
    """
    agent_type = frame.get("agent_type", "local")
    directory = frame.get("directory", "")
    data_types = frame.get("data_types", [])
    existing_config = frame.get("existing_config")
    # Use the session_id provided by the FE so the reply matches the
    # listener key; fall back to a generated one if absent.
    session_id = frame.get("session_id") or str(uuid.uuid4())
    system_prompt, langfuse_prompt = _build_system_prompt(directory, data_types, existing_config)
    session = JourneySession(
        session_id=session_id,
        user_id=user_id,
        agent_type=agent_type,
        directory=directory,
        data_types=data_types,
        system_prompt=system_prompt,
        langfuse_prompt=langfuse_prompt,
    )
    # Seed with an initial user message — some providers require at least one
    # user/input message to be present.
    seed_history: list[dict[str, Any]] = [
        {"role": "user", "content": "Hi, I'm ready to set up my agent. Please explore my directory and ask me your first question."},
    ]
    ai_reply = await _call_llm_with_tools(
        system_prompt=system_prompt,
        history=seed_history,
        tools=make_directory_tools(directory),
        user_id=user_id,
        session_id=session_id,
        langfuse_prompt=langfuse_prompt,
    )
    session.history.extend(seed_history)
    session.history.append({"role": "assistant", "content": ai_reply})
    _sessions[session_id] = session
    logger.info(
        "agent_setup: journey session %s started for user %s (directory=%s)",
        session_id,
        user_id,
        directory,
    )
    # Check if the LLM produced the config on the first turn (unlikely but possible).
    agent_config = _extract_agent_config(ai_reply)
    done = agent_config is not None
    display_message = ai_reply
    if done:
        display_message = (
            ai_reply[: ai_reply.index(_CONFIG_START)].strip()
            or "Here is your agent configuration. You can save it or continue refining."
        )
        _sessions.pop(session_id, None)
    return {
        "type": "journey_reply",
        "session_id": session_id,
        "message": display_message,
        "done": done,
        "agent_config": agent_config,
    }
 async def handle_journey_message(
    user_id: str,
    frame: dict[str, Any],
 ) -> dict[str, Any]:
    """Handle a ``journey_message`` WS frame.
    Appends the user message, calls the LLM, and returns the
    ``journey_reply`` payload.
    """
    session_id = frame.get("session_id", "")
    message = frame.get("message", "")
    session = get_journey_session(session_id, user_id)
    if session is None:
        return {
            "type": "journey_reply",
            "session_id": session_id,
            "message": "Journey session not found or expired. Please start a new setup.",
            "done": True,
            "agent_config": None,
        }
    # Append user turn.
    session.history.append({"role": "user", "content": message})
    # Call the LLM with tools.
    session_tools = make_directory_tools(session.directory)
    ai_reply = await _call_llm_with_tools(
        system_prompt=session.system_prompt,
        history=session.history,
        tools=session_tools,
        user_id=session.user_id,
        session_id=session_id,
        langfuse_prompt=session.langfuse_prompt,
    )
    session.history.append({"role": "assistant", "content": ai_reply})
    # Check if the LLM produced the final config.
    agent_config = _extract_agent_config(ai_reply)
    done = agent_config is not None
    # If the LLM didn't produce a config, nudge it once it hits the hard safety cap.
    if not done:
        turns = sum(1 for t in session.history if t["role"] == "user")
        if turns >= _MAX_TURNS:
            nudge_content = (
                "[System: You have enough information. Please generate the final "
                f"ScoutConfig JSON now, wrapped in {_CONFIG_START} / {_CONFIG_END} markers.]"
            )
            session.history.append({"role": "user", "content": nudge_content})
            nudge_reply = await _call_llm_with_tools(
                system_prompt=session.system_prompt,
                history=session.history,
                tools=session_tools,
                user_id=session.user_id,
                session_id=session_id,
                langfuse_prompt=session.langfuse_prompt,
            )
            session.history.append({"role": "assistant", "content": nudge_reply})
            agent_config = _extract_agent_config(nudge_reply)
            if agent_config is not None:
                done = True
                ai_reply = nudge_reply
    display_message = ai_reply
    if done:
        display_message = (
            ai_reply[: ai_reply.index(_CONFIG_START)].strip()
            if _CONFIG_START in ai_reply
            else "Here is your agent configuration. You can save it or continue refining."
        )
        _sessions.pop(session_id, None)
        logger.info("agent_setup: journey session %s completed for user %s", session_id, user_id)
    return {
        "type": "journey_reply",
        "session_id": session_id,
        "message": display_message,
        "done": done,
        "agent_config": agent_config,
    }
--- a/app/api/routes/scout_webhooks.py
+++ b/app/api/routes/scout_webhooks.py
@@ -0,0 +1,120 @@
 """Gmail Pub/Sub push receiver.
 Google Pub/Sub push subscriptions deliver Gmail watch notifications as POST
 requests with a JSON envelope. The body payload contains a base64-encoded
 JSON blob with ``emailAddress`` + ``historyId``. We resolve the user by
 email, look up their cloud_scout_configs row for provider='gmail', and
 hand off to ScoutEngine.trigger_scout.
 Authentication: Pub/Sub push includes an OIDC JWT in the Authorization
 header. We verify it against Google's public keys with the audience
 configured in our Pub/Sub subscription.
 Dev mode: when ``GMAIL_PUBSUB_AUDIENCE`` is empty, JWT verification is
 skipped and a warning is logged. Production must set this env var.
 """
 from __future__ import annotations
 import base64
 import json
 import logging
 import uuid
 from fastapi import APIRouter, Header, HTTPException, Request, status
 from sqlalchemy import select
 from app.config.settings import settings
 from app.db import async_session
 from app.models import CloudScoutConfig, User
 from app.scouts.engine import ScoutEngine
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/scouts/webhooks", tags=["scout-webhooks"])
 def _verify_pubsub_jwt(token: str) -> bool:
    """Verify the Google Pub/Sub OIDC JWT.
    Returns True when valid, False on any verification failure.
    Dev skip: if ``settings.GMAIL_PUBSUB_AUDIENCE`` is empty, logs a
    warning and returns True so local development works without a real
    Pub/Sub subscription. Production must configure the audience.
    """
    if not token:
        return False
    if not settings.GMAIL_PUBSUB_AUDIENCE:
        logger.warning(
            "GMAIL_PUBSUB_AUDIENCE not set — skipping Pub/Sub JWT verification (dev mode only)"
        )
        return True
    try:
        from google.auth.transport import requests as g_requests  # noqa: PLC0415
        from google.oauth2 import id_token  # noqa: PLC0415
        id_token.verify_oauth2_token(
            token,
            g_requests.Request(),
            audience=settings.GMAIL_PUBSUB_AUDIENCE,
        )
        return True
    except Exception:
        logger.warning("pubsub jwt verification failed", exc_info=True)
        return False
@router.post("/gmail", status_code=status.HTTP_204_NO_CONTENT)
 async def gmail_pubsub(
    request: Request,
    authorization: str = Header(default=""),
 ) -> None:
    """Receive a Gmail Pub/Sub push notification.
    Verifies the OIDC JWT, decodes the Pub/Sub envelope, resolves the user
    by email, and triggers ScoutEngine.trigger_scout for each enabled Gmail
    scout belonging to that user.
    Returns 204 No Content on success (including benign no-ops like unknown
    email or empty message data). Returns 401 on JWT verification failure.
    """
    token = authorization.removeprefix("Bearer ").strip()
    if not _verify_pubsub_jwt(token):
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid Pub/Sub JWT")
    body = await request.json()
    msg = body.get("message") or {}
    raw = msg.get("data")
    if not raw:
        return  # ack without action — empty message data
    try:
        decoded = json.loads(base64.b64decode(raw).decode())
    except Exception:
        logger.warning("pubsub payload decode failed")
        return
    email = decoded.get("emailAddress")
    if not email:
        return
    async with async_session() as session:
        user_q = await session.execute(select(User).where(User.email == email))
        user = user_q.scalar_one_or_none()
        if user is None:
            logger.info("pubsub: no user for %s — ignoring", email)
            return
        scouts_q = await session.execute(
            select(CloudScoutConfig).where(
                CloudScoutConfig.user_id == user.id,
                CloudScoutConfig.provider == "gmail",
                CloudScoutConfig.enabled == True,  # noqa: E712
            )
        )
        scouts = scouts_q.scalars().all()
    engine = ScoutEngine()
    for scout in scouts:
        await engine.trigger_scout(uuid.UUID(str(scout.id)))
--- a/app/api/routes/scouts.py
+++ b/app/api/routes/scouts.py
@@ -0,0 +1,440 @@
 """Scout routes.
 Backend responsibilities are intentionally minimal:
    GET  /scouts/catalog         — static catalog for UI display
    POST /scouts/can-create      — billing eligibility check
    POST /scouts/trigger         — trigger a local scout run
 Scout configuration is owned by the Electron app and is not persisted
 in backend scout-config tables.
 Gmail OAuth setup (scout-specific consent):
    GET  /scouts/oauth/gmail/authorize       — returns consent-screen URL
    GET  /scouts/oauth/gmail/web-callback    — bounces to deep link (excluded from schema)
    POST /scouts/oauth/gmail/callback        — exchanges code, stores encrypted token
 """
 from __future__ import annotations
 import asyncio
 import logging
 import secrets
 import time
 import urllib.parse
 import uuid
 from datetime import datetime, timezone
 from fastapi import APIRouter, Depends, HTTPException, status
 from fastapi.responses import RedirectResponse
 from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from pydantic import BaseModel
 from app.api.deps import get_current_user
 from app.auth.oauth_providers import generate_pkce_pair
 from app.billing.tier_manager import FEATURES
 from app.config.settings import settings
 from app.core.scout_runner import is_agent_running, run_local_agent
 from app.core.device_manager import device_manager
 from app.core.note_summarizer import generate_note_summary
 from app.db import get_session
 from app.integrations import encrypt_token
 from app.models import CloudScoutConfig, ScoutRunLog, LocalScoutConfig
 from app.schemas import (
    ScoutCatalogItem,
    ScoutCreationCheckRequest,
    ScoutCreationCheckResponse,
    ScoutRunLogResponse,
    ScoutTriggerRequest,
    UserProfile,
 )
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/scouts", tags=["scouts"])
 # ── Datetime helpers ──────────────────────────────────────────────────
 def _dt_ms(dt: datetime) -> int:
    return int(dt.timestamp() * 1000)
 def _dt_ms_opt(dt: datetime | None) -> int | None:
    return int(dt.timestamp() * 1000) if dt else None
 def _to_data_types(values: list[str]) -> list[str]:
    normalize = {
        "task": "tasks",           "tasks": "tasks",
        "note": "notes",           "notes": "notes",
        "timeline": "timelines",   "timelines": "timelines",   "timelineEvents": "timelines",
        "project": "projects",     "projects": "projects",
    }
    seen: set[str] = set()
    result: list[str] = []
    for v in values:
        mapped = normalize.get(v)
        if mapped and mapped not in seen:
            seen.add(mapped)
            result.append(mapped)
    return result
 def _to_run_log_response(log: ScoutRunLog) -> ScoutRunLogResponse:
    return ScoutRunLogResponse(
        id=log.id,
        agent_id=log.scout_id,
        agent_type=log.scout_type,  # type: ignore[arg-type]
        status=log.status,  # type: ignore[arg-type]
        items_processed=log.items_processed,
        items_created=log.items_created,
        errors=log.errors or [],
        started_at=_dt_ms(log.started_at),
        completed_at=_dt_ms_opt(log.completed_at),
    )
 def _enforce_agent_limit(tier: str, current_count: int) -> int:
    limit: int = FEATURES.get(tier, FEATURES["free"])["batch_active"]
    if limit != -1 and current_count >= limit:
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail=f"Agent limit ({limit}) reached for your tier. Upgrade to create more.",
        )
    return limit
 async def _enforce_run_frequency(
    tier: str,
    user_id: str,
    db: AsyncSession,
 ) -> None:
    """Raise HTTP 402 if the user has exceeded their daily batch run limit."""
    limit: int = FEATURES.get(tier, FEATURES["free"])["batch_runs_per_day"]
    if limit == -1:
        return  # unlimited
    today_start = datetime.now(timezone.utc).replace(
        hour=0, minute=0, second=0, microsecond=0
    )
    result = await db.execute(
        select(func.count(ScoutRunLog.id)).where(
            ScoutRunLog.user_id == user_id,
            ScoutRunLog.started_at >= today_start,
        )
    )
    runs_today: int = result.scalar_one()
    if runs_today >= limit:
        raise HTTPException(
            status_code=status.HTTP_402_PAYMENT_REQUIRED,
            detail=f"Daily batch run limit ({limit}) reached for your tier. Upgrade for more runs.",
        )
 # ── Catalog ───────────────────────────────────────────────────────────
@router.get("/catalog", response_model=list[ScoutCatalogItem])
 async def get_agent_catalog(
    current_user: UserProfile = Depends(get_current_user),
 ) -> list[ScoutCatalogItem]:
    """Return the static list of available agent types and their descriptions."""
    return [
        ScoutCatalogItem(
            type="local_directory",
            name="Local Directory Monitor",
            description="Watches local directories, extracts data from files using AI",
        ),
        ScoutCatalogItem(
            type="gmail",
            name="Gmail Connector",
            description="Scans Gmail inbox, extracts tasks/notes from emails",
        ),
        ScoutCatalogItem(
            type="teams",
            name="Microsoft Teams Connector",
            description="Monitors Teams messages, extracts action items",
        ),
        ScoutCatalogItem(
            type="outlook",
            name="Outlook Connector",
            description="Scans Outlook inbox, extracts tasks/notes",
        ),
    ]
@router.post("/can-create", response_model=ScoutCreationCheckResponse)
 async def can_create_agent(
    body: ScoutCreationCheckRequest,
    current_user: UserProfile = Depends(get_current_user),
 ) -> ScoutCreationCheckResponse:
    """Check if the user can create one more agent based on billing tier.
    Since configuration is client-owned, the Electron app sends its current
    active agent count and the backend applies tier limits.
    """
    limit: int = FEATURES.get(current_user.tier, FEATURES["free"])["batch_active"]
    allowed = limit == -1 or body.active_agents < limit
    return ScoutCreationCheckResponse(
        allowed=allowed,
        tier=current_user.tier,
        active_agents=body.active_agents,
        limit=limit,
    )
@router.post("/trigger", response_model=ScoutRunLogResponse, status_code=status.HTTP_202_ACCEPTED)
 async def trigger_agent_run(
    body: ScoutTriggerRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> ScoutRunLogResponse:
    """Trigger a local agent run using client-provided configuration."""
    _enforce_agent_limit(current_user.tier, body.active_agents)
    await _enforce_run_frequency(current_user.tier, current_user.id, db)
    last_run_dt = (
        datetime.fromtimestamp(body.last_run_at / 1000, tz=timezone.utc)
        if body.last_run_at
        else None
    )
    config = LocalScoutConfig(
        id=str(uuid.uuid4()),
        user_id=current_user.id,
        device_id=body.device_id,
        name="Local Directory Monitor",
        directory_paths=[body.directory],
        data_types=_to_data_types(body.what_to_extract),
        prompt_template=body.custom_agent_prompt or "",
        scout_config=body.agent_config,
        file_extensions=[],
        schedule_cron=body.batch_interval,
        enabled=True,
        last_run_at=last_run_dt,
    )
    # Use the FE's stable agent_id if provided, fall back to the ephemeral config id.
    stable_agent_id = body.agent_id or config.id
    if is_agent_running(stable_agent_id):
        raise HTTPException(
            status_code=status.HTTP_409_CONFLICT,
            detail="Agent is already running. Only one run per agent is allowed at a time.",
        )
    run_log = ScoutRunLog(
        scout_id=stable_agent_id,
        scout_type="local",
        user_id=current_user.id,
        status="running",
    )
    db.add(run_log)
    await db.commit()
    await db.refresh(run_log)
    run_context = {
        "type": "agent_batch",
        "run_id": run_log.id,
        "agent_id": stable_agent_id,
    }
    asyncio.create_task(
        run_local_agent(current_user.id, config, run_log, device_manager, run_context)
    )
    return _to_run_log_response(run_log)
 # ── Note summary endpoint ──────────────────────────────────────────────────────
 class NoteSummarizeRequest(BaseModel):
    title: str
    content: str
 class NoteSummarizeResponse(BaseModel):
    summary: str
@router.post("/notes/summarize", response_model=NoteSummarizeResponse)
 async def summarize_note(
    body: NoteSummarizeRequest,
    current_user: UserProfile = Depends(get_current_user),
 ) -> NoteSummarizeResponse:
    """Generate an AI summary for a note.  Used by the Electron backfill on startup."""
    summary = await generate_note_summary(body.title, body.content)
    return NoteSummarizeResponse(summary=summary)
 # ── Gmail OAuth setup (scout-specific) ───────────────────────────────────────
 # Scopes required for Gmail scout connectivity.
 _GMAIL_SCOUT_SCOPES = [
    "openid",
    "email",
    "https://www.googleapis.com/auth/gmail.readonly",
    "https://www.googleapis.com/auth/gmail.modify",
 ]
 # Google OAuth endpoints.
 _GOOGLE_AUTH_URL = "https://accounts.google.com/o/oauth2/v2/auth"
 _GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token"
 # In-memory pending OAuth states for scout Gmail consent:
 # state → (code_verifier, scout_id, user_id, expires_at_epoch_s)
 # Production note: replace with Redis for multi-process deployments.
 _pending_scout_oauth_states: dict[str, tuple[str, str, str, float]] = {}
 _SCOUT_OAUTH_TTL_SECONDS = 600  # 10 minutes
 def _scout_gmail_redirect_uri() -> str:
    """Derive the scout Gmail web-callback URI from the configured base OAUTH_REDIRECT_URI.
    ``OAUTH_REDIRECT_URI`` is the full path used for login OAuth
    (e.g. http://localhost:8000/api/v1/auth/oauth/google/web-callback).
    We strip the path to get the scheme+host base, then append the scout path.
    """
    parsed = urllib.parse.urlparse(settings.OAUTH_REDIRECT_URI)
    base = f"{parsed.scheme}://{parsed.netloc}"
    return f"{base}/api/v1/scouts/oauth/gmail/web-callback"
 class _ScoutGmailAuthorizeResponse(BaseModel):
    authorize_url: str
 class _ScoutGmailCallbackBody(BaseModel):
    code: str
    state: str
@router.get("/oauth/gmail/authorize", response_model=_ScoutGmailAuthorizeResponse)
 async def scout_gmail_oauth_authorize(
    scout_id: str,
    current_user: UserProfile = Depends(get_current_user),
 ) -> _ScoutGmailAuthorizeResponse:
    """Start the Gmail OAuth flow for a specific cloud scout.
    Returns the Google consent-screen URL.  The client opens this URL in the
    system browser; after consent Google redirects to web-callback which bounces
    to the ``adiuvai://scout/oauth/gmail/callback`` deep link.
    """
    if not settings.GOOGLE_AUTH_CLIENT_ID or not settings.GOOGLE_AUTH_CLIENT_SECRET:
        raise HTTPException(
            status.HTTP_503_SERVICE_UNAVAILABLE,
            "Google OAuth is not configured on this server",
        )
    code_verifier, code_challenge = generate_pkce_pair()
    state = secrets.token_urlsafe(32)
    # Purge expired states to prevent unbounded growth.
    now = time.time()
    expired = [s for s, (_, _, _, exp) in _pending_scout_oauth_states.items() if exp < now]
    for s in expired:
        del _pending_scout_oauth_states[s]
    _pending_scout_oauth_states[state] = (code_verifier, scout_id, current_user.id, now + _SCOUT_OAUTH_TTL_SECONDS)
    redirect_uri = _scout_gmail_redirect_uri()
    params = {
        "client_id": settings.GOOGLE_AUTH_CLIENT_ID,
        "redirect_uri": redirect_uri,
        "response_type": "code",
        "scope": " ".join(_GMAIL_SCOUT_SCOPES),
        "state": state,
        "code_challenge": code_challenge,
        "code_challenge_method": "S256",
        "access_type": "offline",
        "prompt": "consent",
    }
    authorize_url = f"{_GOOGLE_AUTH_URL}?{urllib.parse.urlencode(params)}"
    return _ScoutGmailAuthorizeResponse(authorize_url=authorize_url)
@router.get("/oauth/gmail/web-callback", include_in_schema=False)
 async def scout_gmail_oauth_web_callback(code: str, state: str) -> RedirectResponse:
    """Google redirects here after Gmail consent.
    Immediately bounces to the Electron deep link so the desktop app
    receives the authorization code.
    """
    params = urllib.parse.urlencode({"code": code, "state": state})
    deep_link = f"adiuvai://scout/oauth/gmail/callback?{params}"
    return RedirectResponse(url=deep_link, status_code=302)
@router.post("/oauth/gmail/callback")
 async def scout_gmail_oauth_callback(
    body: _ScoutGmailCallbackBody,
    db: AsyncSession = Depends(get_session),
    current_user: UserProfile = Depends(get_current_user),
 ) -> dict:
    """Exchange the Gmail authorization code and store the encrypted token on the scout.
    Called by the Electron app after it receives the deep-link callback with
    the ``code`` and ``state`` params.
    """
    entry = _pending_scout_oauth_states.pop(body.state, None)
    if entry is None or entry[3] < time.time() or entry[2] != current_user.id:
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid or expired OAuth state")
    code_verifier, scout_id, _, _ = entry
    redirect_uri = _scout_gmail_redirect_uri()
    import httpx
    async with httpx.AsyncClient() as client:
        response = await client.post(
            _GOOGLE_TOKEN_URL,
            data={
                "client_id": settings.GOOGLE_AUTH_CLIENT_ID,
                "client_secret": settings.GOOGLE_AUTH_CLIENT_SECRET,
                "code": body.code,
                "code_verifier": code_verifier,
                "grant_type": "authorization_code",
                "redirect_uri": redirect_uri,
            },
        )
    try:
        response.raise_for_status()
    except httpx.HTTPStatusError as exc:
        logger.error("Gmail token exchange failed: %s", exc.response.text)
        raise HTTPException(status.HTTP_502_BAD_GATEWAY, "Failed to exchange Gmail authorization code")
    token_data = response.json()
    creds_dict: dict = {
        "token": token_data["access_token"],
        "refresh_token": token_data.get("refresh_token"),
        "token_uri": _GOOGLE_TOKEN_URL,
        "client_id": settings.GOOGLE_AUTH_CLIENT_ID,
        "client_secret": settings.GOOGLE_AUTH_CLIENT_SECRET,
        "scopes": [
            "https://www.googleapis.com/auth/gmail.readonly",
            "https://www.googleapis.com/auth/gmail.modify",
        ],
    }
    encrypted = encrypt_token(creds_dict)
    scout = await db.get(CloudScoutConfig, scout_id)
    if scout is None or scout.user_id != current_user.id:
        raise HTTPException(status.HTTP_404_NOT_FOUND, "Scout not found")
    scout.oauth_token_encrypted = encrypted
    await db.commit()
    # Attempt to set up Gmail push watch so we start receiving Pub/Sub notifications.
    from app.scouts.connectors.registry import get_connector
    try:
        connector = get_connector("gmail")
        await connector.setup_watch(scout)
        await db.commit()
    except KeyError:
        logger.warning("gmail connector not registered — skipping setup_watch for scout %s", scout_id)
    except Exception:
        logger.exception("setup_watch failed for scout %s", scout_id)
    return {"ok": True}
--- a/app/auth/init.py
+++ b/app/auth/init.py
@@ -0,0 +1 @@
 "OAuth provider abstractions and utilities."
--- a/app/auth/oauth_providers.py
+++ b/app/auth/oauth_providers.py
@@ -0,0 +1,135 @@
 """OAuth 2.0 + PKCE provider abstractions.
 Each provider implements a three-step flow designed for a desktop (public) client:
  1. get_authorization_url(state, code_challenge) → str
       Build the provider's consent-screen URL.  State and code_challenge are
       generated server-side; the client opens this URL in the system browser.
  2. exchange_code(code, code_verifier, redirect_uri) → dict
       Exchange the short-lived authorization code for an access token.
       The code_verifier proves ownership of the PKCE challenge.
  3. get_userinfo(access_token) → OAuthUserInfo
       Fetch the canonical user identity from the provider.
 Currently supported providers:
  - GoogleOAuthProvider  (scope: openid email profile)
 Adding a new provider:
  - Implement the three methods above.
  - Register in _PROVIDERS inside routes/auth.py.
 """
 from __future__ import annotations
 import base64
 import hashlib
 import os
 import urllib.parse
 from dataclasses import dataclass
 import httpx
 # ── Data transfer objects ─────────────────────────────────────────────
@dataclass
 class OAuthUserInfo:
    """Normalized user identity returned by any provider."""
    provider_user_id: str
    email: str
    email_verified: bool
    avatar_url: str | None
    name: str | None
 # ── PKCE helpers ──────────────────────────────────────────────────────
 def generate_pkce_pair() -> tuple[str, str]:
    """Generate a (code_verifier, code_challenge) pair for PKCE S256.
    The code_verifier is a random 32-byte URL-safe base64 string.
    The code_challenge is SHA-256(code_verifier) base64url-encoded (no padding).
    """
    code_verifier = base64.urlsafe_b64encode(os.urandom(32)).rstrip(b"=").decode()
    digest = hashlib.sha256(code_verifier.encode()).digest()
    code_challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode()
    return code_verifier, code_challenge
 # ── Google provider ───────────────────────────────────────────────────
 class GoogleOAuthProvider:
    """Google OAuth 2.0 provider (openid email profile scope).
    Uses Google's standard authorization endpoint with PKCE S256.
    Does NOT use google-auth-oauthlib to keep the flow generic and async.
    """
    name = "google"
    _AUTH_URL = "https://accounts.google.com/o/oauth2/v2/auth"
    _TOKEN_URL = "https://oauth2.googleapis.com/token"
    _USERINFO_URL = "https://www.googleapis.com/oauth2/v3/userinfo"
    def __init__(self, client_id: str, client_secret: str, redirect_uri: str) -> None:
        self.client_id = client_id
        self.client_secret = client_secret
        self.redirect_uri = redirect_uri
    def get_authorization_url(self, state: str, code_challenge: str) -> str:
        """Build the Google consent-screen URL."""
        params = {
            "client_id": self.client_id,
            "redirect_uri": self.redirect_uri,
            "response_type": "code",
            "scope": "openid email profile",
            "state": state,
            "code_challenge": code_challenge,
            "code_challenge_method": "S256",
            "access_type": "offline",
            "prompt": "select_account",
        }
        return f"{self._AUTH_URL}?{urllib.parse.urlencode(params)}"
    async def exchange_code(
        self, code: str, code_verifier: str, redirect_uri: str
    ) -> dict:
        """Exchange authorization code for an access token."""
        async with httpx.AsyncClient() as client:
            response = await client.post(
                self._TOKEN_URL,
                data={
                    "client_id": self.client_id,
                    "client_secret": self.client_secret,
                    "code": code,
                    "code_verifier": code_verifier,
                    "grant_type": "authorization_code",
                    "redirect_uri": redirect_uri,
                },
            )
        response.raise_for_status()
        return response.json()
    async def get_userinfo(self, access_token: str) -> OAuthUserInfo:
        """Fetch the authenticated user's identity from Google."""
        async with httpx.AsyncClient() as client:
            response = await client.get(
                self._USERINFO_URL,
                headers={"Authorization": f"Bearer {access_token}"},
            )
        response.raise_for_status()
        data = response.json()
        return OAuthUserInfo(
            provider_user_id=data["sub"],
            email=data["email"],
            email_verified=data.get("email_verified", False),
            avatar_url=data.get("picture"),
            name=data.get("name"),
        )
--- a/app/billing/init.py
+++ b/app/billing/init.py
@@ -0,0 +1,4 @@
 from app.billing.stripe_service import stripe_service
 from app.billing.tier_manager import tier_manager
 __all__ = ["stripe_service", "tier_manager"]
--- a/app/billing/quota.py
+++ b/app/billing/quota.py
@@ -0,0 +1,139 @@
 """Quota checks and atomic token-usage accounting for folder integration."""
 from __future__ import annotations
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from sqlalchemy import select, update
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.billing.tier_manager import TierManager
 from app.models import MonthlyTokenUsage
 from app.schemas import BillingTier
 class QuotaExceeded(Exception):
    """Raised when a folder operation cannot proceed under the user's tier."""
    def __init__(self, reason: str, message: str) -> None:
        super().__init__(message)
        self.reason = reason  # "max_files" | "monthly_tokens"
@dataclass
 class TokenUsageResult:
    tokens_used: int
    exhausted: bool
 def _current_year_month() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m")
 _tier_manager = TierManager()
 async def check_folder_quota(
    *,
    user_id: str,
    tier: BillingTier,
    estimated_files: int,
    db: AsyncSession,
 ) -> None:
    """Raise QuotaExceeded if folder_max_files or folder_monthly_tokens
    would be violated. -1 in either feature means unlimited."""
    max_files = _tier_manager.get_feature_value(tier, "folder_max_files")
    if max_files != -1 and estimated_files > max_files:
        raise QuotaExceeded(
            "max_files",
            f"Folder has {estimated_files} files; tier '{tier}' allows max {max_files}.",
        )
    cap = _tier_manager.get_feature_value(tier, "folder_monthly_tokens")
    if cap == -1:
        return
    ym = _current_year_month()
    row = (
        await db.execute(
            select(MonthlyTokenUsage).where(
                MonthlyTokenUsage.user_id == user_id,
                MonthlyTokenUsage.year_month == ym,
                MonthlyTokenUsage.feature == "folder_index",
            )
        )
    ).scalar_one_or_none()
    used = row.tokens_used if row else 0
    if used >= cap:
        raise QuotaExceeded(
            "monthly_tokens",
            f"Monthly token budget exhausted ({used}/{cap}); resets next month.",
        )
 async def add_token_usage(
    *,
    user_id: str,
    feature: str,
    tokens: int,
    db: AsyncSession,
    cap: int | None = None,
 ) -> TokenUsageResult:
    """Atomically add `tokens` to MonthlyTokenUsage row for (user, current month, feature).
    Uses PostgreSQL ``INSERT … ON CONFLICT DO UPDATE`` when available; falls
    back to a read-then-write on other engines (e.g. aiosqlite in tests).
    Returns post-update total and whether cap is exhausted.
    """
    ym = _current_year_month()
    # Detect dialect to choose between native upsert and portable fallback.
    dialect_name: str = db.bind.dialect.name if db.bind is not None else ""  # type: ignore[union-attr]
    if dialect_name == "postgresql":
        # Native atomic upsert — production path.
        stmt = (
            pg_insert(MonthlyTokenUsage)
            .values(
                user_id=user_id,
                year_month=ym,
                feature=feature,
                tokens_used=tokens,
            )
            .on_conflict_do_update(
                index_elements=["user_id", "year_month", "feature"],
                set_={"tokens_used": MonthlyTokenUsage.tokens_used + tokens},
            )
            .returning(MonthlyTokenUsage.tokens_used)
        )
        used: int = (await db.execute(stmt)).scalar_one()
        await db.commit()
    else:
        # Portable fallback — used in tests (SQLite) and any non-PG engine.
        row = (
            await db.execute(
                select(MonthlyTokenUsage).where(
                    MonthlyTokenUsage.user_id == user_id,
                    MonthlyTokenUsage.year_month == ym,
                    MonthlyTokenUsage.feature == feature,
                )
            )
        ).scalar_one_or_none()
        if row is None:
            row = MonthlyTokenUsage(
                user_id=user_id,
                year_month=ym,
                feature=feature,
                tokens_used=tokens,
            )
            db.add(row)
        else:
            row.tokens_used += tokens
        await db.commit()
        await db.refresh(row)
        used = row.tokens_used
    exhausted = cap is not None and cap != -1 and used >= cap
    return TokenUsageResult(tokens_used=used, exhausted=exhausted)
--- a/services/billing/app/stripe_service.py
+++ b/services/billing/app/stripe_service.py
@@ -1,7 +1,7 @@
 """Stripe service: checkout sessions, webhook handling, subscription management.
-Adapted for the Billing microservice — uses shared.models and shared.db.
+Subscription records are persisted in the PostgreSQL ``subscriptions`` table.
-All Stripe calls are gracefully stubbed when STRIPE_SECRET_KEY is not
+All Stripe calls are gracefully stubbed when ``STRIPE_SECRET_KEY`` is not
 configured, enabling local development without live credentials.
 """
@@ -15,8 +15,7 @@ from fastapi import HTTPException, status
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
-from shared.config import settings
+from app.config.settings import settings
 from shared.models import Subscription
 # Stripe price IDs per tier — replace with real IDs in production .env
 TIER_PRICE_IDS: dict[str, str] = {
@@ -44,10 +43,14 @@ class StripeService:
        self,
        user_id: str,
        tier: str,
-        success_url: str = "https://app.adiuva.app/billing/success?session_id={CHECKOUT_SESSION_ID}",
+        success_url: str = "https://app.adiuvai.app/billing/success?session_id={CHECKOUT_SESSION_ID}",
-        cancel_url: str = "https://app.adiuva.app/billing/cancel",
+        cancel_url: str = "https://app.adiuvai.app/billing/cancel",
    ) -> str:
-        """Create a Stripe checkout session and return the URL."""
+        """Create a Stripe checkout session and return the URL.
        Returns a stub URL when Stripe is not configured.
        Raises ``HTTP 400`` for the free tier or an unknown tier.
        """
        if tier == "free":
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
@@ -84,6 +87,8 @@ class StripeService:
        """Process a Stripe webhook event.
        Verifies the signature, then dispatches on event type.
        Raises ``HTTP 400`` on signature mismatch.
        No-ops when Stripe is not configured.
        """
        if not self._configured():
            return
@@ -150,7 +155,9 @@ class StripeService:
    async def get_subscription(
        self, user_id: str, db: AsyncSession
    ) -> dict[str, Any] | None:
-        """Return the subscription record for user_id, or None."""
+        """Return the subscription record for ``user_id``, or ``None`` if absent."""
        from app.models import Subscription  # noqa: PLC0415
        result = await db.execute(
            select(Subscription).where(Subscription.user_id == user_id)
        )
@@ -169,7 +176,12 @@ class StripeService:
        }
    async def cancel_subscription(self, user_id: str, db: AsyncSession) -> None:
-        """Cancel the user's Stripe subscription and downgrade to free."""
+        """Cancel the user's Stripe subscription and downgrade them to free.
        Raises ``HTTP 404`` when no active subscription exists.
        """
        from app.models import Subscription  # noqa: PLC0415
        result = await db.execute(
            select(Subscription).where(Subscription.user_id == user_id)
        )
@@ -188,6 +200,45 @@ class StripeService:
        sub.status = "canceled"
        await db.commit()
    async def list_invoices(
        self, user_id: str, db: AsyncSession, limit: int = 24
    ) -> list[dict[str, Any]]:
        """Return recent invoices for the user from Stripe.
        Returns an empty list when Stripe is not configured or the user has
        no ``stripe_customer_id``.
        """
        if not self._configured():
            return []
        from app.models import User  # noqa: PLC0415
        result = await db.execute(
            select(User.stripe_customer_id).where(User.id == user_id)
        )
        customer_id = result.scalar_one_or_none()
        if not customer_id:
            return []
        try:
            s = self._client()
            invoices = s.Invoice.list(customer=customer_id, limit=limit)
            return [
                {
                    "id": inv.id,
                    "amount_due": inv.amount_due,
                    "amount_paid": inv.amount_paid,
                    "currency": inv.currency,
                    "status": inv.status,
                    "created": inv.created * 1000,  # epoch ms
                    "invoice_url": inv.hosted_invoice_url,
                    "invoice_pdf": inv.invoice_pdf,
                }
                for inv in invoices.auto_paging_iter()
            ]
        except Exception:
            return []
    # ── Private DB helpers ───────────────────────────────────────────────
    async def _upsert_subscription(
@@ -199,6 +250,8 @@ class StripeService:
        sub_status: str,
        current_period_end: datetime | None,
    ) -> None:
        from app.models import Subscription  # noqa: PLC0415
        result = await db.execute(
            select(Subscription).where(Subscription.user_id == user_id)
        )
@@ -220,6 +273,8 @@ class StripeService:
        status: str | None = None,
        current_period_end: datetime | None = None,
    ) -> None:
        from app.models import Subscription  # noqa: PLC0415
        result = await db.execute(
            select(Subscription).where(
                Subscription.stripe_subscription_id == stripe_subscription_id
@@ -236,5 +291,5 @@ class StripeService:
            sub.current_period_end = current_period_end
-# Module-level singleton
+# Module-level singleton shared across the app.
 stripe_service = StripeService()
--- a/app/billing/tier_manager.py
+++ b/app/billing/tier_manager.py
@@ -0,0 +1,149 @@
 """Tier manager: feature matrix and quota enforcement.
 ``TierManager`` is the single source of truth for what each billing tier
 allows.  ``get_tier`` queries the ``subscriptions`` table for the live tier.
 Quota-enforcement helpers take ``tier`` directly — the caller already has it
 from ``current_user.tier`` (provided by ``get_current_user``).
 """
 from __future__ import annotations
 from typing import Any
 from fastapi import HTTPException, status
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.schemas import BillingTier
 # Feature matrix per tier.  -1 means unlimited; 0 means disabled.
 FEATURES: dict[str, dict[str, Any]] = {
    "free": {
        "agents": 3,
        "batch_active": 2,
        "batch_runs_per_day": 5,
        "providers": 1,
        "batch_builder": False,
        "sso": False,
        "real_embeddings": False,       # keyword fallback only
        "realtime_extraction": False,   # batch queue (Phase 2)
        "relational_memory": False,     # relational tier (Phase 3) — Pro+
        "proactive_mining": False,      # Power+ only (Phase 5)
        "folder_max_files": 200,
        "folder_monthly_tokens": 100_000,
    },
    "pro": {
        "agents": -1,           # unlimited
        "batch_active": 10,
        "batch_runs_per_day": 50,
        "providers": -1,
        "batch_builder": False,
        "sso": False,
        "real_embeddings": True,        # pgvector cosine search
        "realtime_extraction": True,    # fire-and-forget asyncio.create_task
        "relational_memory": True,      # person/project predicates
        "proactive_mining": False,      # Power+ only (Phase 5)
        "folder_max_files": 5000,
        "folder_monthly_tokens": 2_000_000,
    },
    "power": {
        "agents": -1,
        "batch_active": -1,     # unlimited
        "batch_runs_per_day": -1,  # unlimited
        "providers": -1,
        "batch_builder": True,
        "sso": False,
        "real_embeddings": True,
        "realtime_extraction": True,
        "relational_memory": True,      # all predicates incl. custom
        "proactive_mining": True,       # scheduled pattern mining (Phase 5)
        "folder_max_files": -1,         # unlimited
        "folder_monthly_tokens": -1,    # unlimited
    },
    "team": {
        "agents": -1,
        "batch_active": -1,
        "batch_runs_per_day": -1,  # unlimited
        "providers": -1,
        "batch_builder": True,
        "sso": True,
        "real_embeddings": True,
        "realtime_extraction": True,
        "relational_memory": True,      # all predicates incl. custom
        "proactive_mining": True,       # scheduled pattern mining (Phase 5)
        "folder_max_files": -1,         # unlimited
        "folder_monthly_tokens": -1,    # unlimited
    },
 }
 # Requests-per-minute limit per tier.
 RATE_LIMITS: dict[str, int] = {
    "free": 20,
    "pro": 60,
    "power": 120,
    "team": 200,
 }
 class TierManager:
    """Centralises tier feature-gating, rate-limit lookups, and quota checks."""
    # ── Tier lookup ─────────────────────────────────────────────────────
    async def get_tier(self, user_id: str, db: AsyncSession) -> BillingTier:
        """Return the current billing tier for ``user_id`` from the DB.
        Falls back to ``'power'`` in dev (unlimited) or ``'free'`` in prod
        when no subscription row exists.
        """
        from app.models import Subscription  # noqa: PLC0415
        from app.config.settings import settings  # noqa: PLC0415
        result = await db.execute(
            select(Subscription.tier).where(Subscription.user_id == user_id)
        )
        tier: str | None = result.scalar_one_or_none()
        if tier is None or tier not in FEATURES:
            return "power" if settings.ENV == "dev" else "free"
        return tier  # type: ignore[return-value]
    # ── Feature access ───────────────────────────────────────────────────
    def check_feature(self, tier: BillingTier, feature: str) -> bool:
        """Return ``True`` if ``tier`` has ``feature`` enabled.
        For numeric features, any value > 0 or -1 (unlimited) counts as enabled.
        """
        value = FEATURES.get(tier, FEATURES["free"]).get(feature)
        if value is None:
            return False
        if isinstance(value, bool):
            return value
        return value != 0
    def require_feature(self, tier: BillingTier, feature: str, tier_name: str = "") -> None:
        """Raise ``HTTP 403`` if ``tier`` does not have ``feature``."""
        if not self.check_feature(tier, feature):
            detail = (
                f"Feature '{feature}' requires {tier_name} tier or above."
                if tier_name
                else f"Feature '{feature}' is not available on your current tier."
            )
            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
    def get_feature_value(self, tier: BillingTier, feature: str) -> int:
        """Return integer feature value for tier. -1 means unlimited."""
        value = FEATURES.get(tier, FEATURES["free"]).get(feature)
        if not isinstance(value, int):
            return 0
        return value
    # ── Rate limiting ────────────────────────────────────────────────────
    def get_rate_limit(self, tier: BillingTier) -> int:
        """Return the requests-per-minute limit for ``tier``."""
        return RATE_LIMITS.get(tier, RATE_LIMITS["free"])
 # Module-level singleton shared across the app.
 tier_manager = TierManager()
--- a/services/chat/app/init.py
+++ b/services/chat/app/init.py
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -0,0 +1,95 @@
 from typing import Literal
 from pydantic_settings import BaseSettings, SettingsConfigDict
 class Settings(BaseSettings):
    DATABASE_URL: str = "postgresql+asyncpg://postgres:postgres@localhost:5432/adiuvai"
    JWT_SECRET: str = "change-me-in-production"
    JWT_ALGORITHM: str = "HS256"
    JWT_ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
    JWT_REFRESH_TOKEN_EXPIRE_DAYS: int = 30
    STRIPE_SECRET_KEY: str = ""
    STRIPE_WEBHOOK_SECRET: str = ""
    OPENAI_API_KEY: str = ""
    ANTHROPIC_API_KEY: str = ""
    GOOGLE_API_KEY: str = ""
    CEREBRAS_API_KEY: str = ""
    GROQ_API_KEY: str = ""
    DEEPSEEK_API_KEY: str = ""
    LLM_MODEL: str = "gpt-4o"
    LLM_EMBED_MODEL: str = "text-embedding-3-small"
    # Per-agent model overrides. Leave empty to fall back to LLM_MODEL.
    LLM_MODEL_CLASSIFIER: str = ""        # classifier (intent routing, future use)
    LLM_MODEL_HOME_AGENT: str = ""        # home-agent (run_single_agent / stream)
    LLM_MODEL_UNIFIED_PROCESSOR: str = "" # unified-processor (agent_runner)
    LLM_MODEL_CLOUD_PROCESSOR: str = ""   # cloud-processor (agent_runner)
    LLM_MODEL_BRIEF_AGENT: str = ""            # brief-agent (home + project text briefs)
    LLM_MODEL_TASK_BRIEF_AGENT: str = ""      # task-brief-agent (per-task deep research)
    LLM_MODEL_SETUP_AGENT: str = ""           # agent-setup journey
    LLM_MODEL_MEMORY_EXTRACTOR: str = ""  # memory-extractor (Phase 2 extract/decide)
    LLM_MODEL_MEMORY_MINER: str = ""      # memory-miner (Phase 5 proactive mining)
    LLM_MODEL_MEMORY_AUDITOR: str = ""    # memory-auditor (Phase 7 weekly audit)
    # GitHub Copilot OAuth token storage directory.
    # Leave empty to use the LiteLLM default (~/.config/litellm/github_copilot).
    # In Docker, set this to a path backed by a named volume so tokens survive restarts.
    GITHUB_COPILOT_TOKEN_DIR: str = ""
    # OAuth client credentials — used for Gmail and Microsoft (Outlook/Teams) flows.
    GMAIL_CLIENT_ID: str = ""
    GMAIL_CLIENT_SECRET: str = ""
    MS_CLIENT_ID: str = ""
    MS_CLIENT_SECRET: str = ""
    # MS_TENANT_ID: set to 'common' to allow multi-tenant (personal + work accounts).
    MS_TENANT_ID: str = "common"
    # Google Login OAuth credentials — scope: openid email profile.
    # Separate from GMAIL_CLIENT_ID/SECRET (which uses gmail.readonly scope).
    GOOGLE_AUTH_CLIENT_ID: str = ""
    GOOGLE_AUTH_CLIENT_SECRET: str = ""
    # The redirect URI registered in Google Cloud Console.
    # Google redirects here after consent; this backend route then bounces to
    # the adiuvai:// deep link so the Electron app receives the code.
    # Dev:  http://localhost:8000/api/v1/auth/oauth/google/web-callback
    # Prod: https://api.adiuvai.com/api/v1/auth/oauth/google/web-callback
    OAUTH_REDIRECT_URI: str = "http://localhost:8000/api/v1/auth/oauth/google/web-callback"
    # Gmail Pub/Sub topic for push notifications.
    # Full resource name, e.g. "projects/my-project/topics/gmail-push".
    # Leave empty in dev — setup_watch will skip registration gracefully.
    GMAIL_PUBSUB_TOPIC: str = ""
    # OIDC token audience for Pub/Sub push subscription JWT verification.
    # Set to the service account email or audience string configured in the
    # Pub/Sub push subscription. Leave empty in dev to skip verification
    # (a warning is logged — never silent in production).
    GMAIL_PUBSUB_AUDIENCE: str = ""
    # Fernet key (URL-safe base64, 32-byte key) for at-rest encryption of OAuth
    # tokens stored in cloud_agent_configs.oauth_token_encrypted.
    # Generate with: from cryptography.fernet import Fernet; Fernet.generate_key()
    OAUTH_ENCRYPTION_KEY: str = ""
    CORS_ORIGINS: list[str] = [
        "app://.",
        "http://localhost:3000",
        "http://localhost:5173",
        "http://localhost:4173",      # Vite preview (web SPA)
        "https://app.adiuvai.com",    # Production web portal
    ]
    LANGFUSE_SECRET_KEY: str = ""
    LANGFUSE_PUBLIC_KEY: str = ""
    LANGFUSE_BASE_URL: str = "https://cloud.langfuse.com"
    SCHEDULER_ENABLED: bool = True
    ENV: Literal["dev", "prod"] = "dev"
    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
 settings = Settings()
--- a/services/ws-gateway/app/init.py
+++ b/services/ws-gateway/app/init.py
--- a/app/core/brief_agent.py
+++ b/app/core/brief_agent.py
@@ -0,0 +1,228 @@
 """Brief agent — produces plain-text home and project status briefs.
 Read-only tool subset only. Never calls _normalize_tagged_list_lines —
 the brief prompt forbids XML tags, so skipping post-processing is intentional.
 """
 from __future__ import annotations
 from collections.abc import AsyncGenerator
 from datetime import date
 from typing import Any
 from app.agents.note_agent import NOTE_READ_TOOLS
 from app.agents.project_agent import PROJECT_READ_TOOLS
 from app.agents.task_agent import TASK_READ_TOOLS
 from app.agents.timeline_agent import TIMELINE_READ_TOOLS
 from app.core.deep_agent import (
    _language_instruction,
    _proactive_hints_injection,
    _read_only_memory_tools,
    _relational_memory_injection,
    _run_single_agent_stream,
    _trace_id_from_context,
    build_brief_multi_project_manifest,
 )
 from app.core.langfuse_client import compile_prompt, get_prompt_or_fallback
 _LANGUAGE_NAMES: dict[str, str] = {
    "en": "English", "it": "Italian", "es": "Spanish",
    "fr": "French", "de": "German",
    "english": "English", "italian": "Italian", "italiano": "Italian",
    "spanish": "Spanish", "español": "Spanish",
    "french": "French", "français": "French",
    "german": "German", "deutsch": "German",
 }
 _HOME_BRIEF_FALLBACK = """\
 You are the user's personal assistant producing a short daily brief.
 ROLE
 Act like a calm, attentive secretary writing a stand-up note for your boss.
 Warm and human, never breezy. Never cheerful filler, never emojis, never
 "here is your brief" meta-text. The user is opening the app mid-workday and
 is probably stressed — your job is to lower cognitive load, not add noise.
 TOOLS — always call before writing
 Pull fresh data every run. Do not invent counts or titles. Use at minimum:
 - list_tasks_due_today — tasks the user owes today
 - list_timelines_today — events starting or ending today
 - list_all_projects — projects currently in progress or at risk
 - memory_list_blocks / memory_get — personal context about people, clients,
  payment habits, working preferences
 If a tool returns nothing, simply omit that topic. Never report zeros.
 WHAT TO INCLUDE
 1. Tasks due today (title + priority; group the 1-2 most important).
 2. Timeline events starting or ending today (and anything that starts/ends
   tomorrow if the user has a very light day).
 3. Active projects that need a nudge — stalled, blocked, or awaiting input.
 4. Memory-aware colour where it sharpens the brief. Examples:
   - "Client Rossi tends to pay late — the Acme invoice is 6 days out."
   - "You usually dislike meetings before 10:00 — the call at 09:30 is unusual."
   Only add a memory line when it changes what the user does. Do not pad.
 WHAT TO OMIT
 - Zero-counts ("no overdue items", "0 meetings today").
 - Statistics ("2 active projects, 3 completed tasks").
 - Headers, titles, greetings, sign-offs, dates, emojis, slang.
 - Meta-phrases ("here is", "let me know if", "hope this helps").
 - XML/HTML tags of any kind. Plain prose only.
 LIGHT-DAY CLAUSE
 If tasks + events + active-project-nudges together produce fewer than two
 sentences of content, also list 1-2 projects in status on_hold or waiting
 and ask a single, specific question about them — e.g. "Is the Bianchi
 redesign still paused, or ready to pick back up?" One question max, grounded
 in a real project name.
 VOICE
 - Calm. Concise. Human. Short sentences.
 - Use **bold** sparingly for task titles, project names, and people's names.
 - No bullet lists. Flow as 2-4 sentences of prose.
 LENGTH
 2-4 sentences total. Hard cap 4. If the day is truly empty, one sentence.
 Respond in the user's language ({language}). Today is {today}.\
 """
 _PROJECT_BRIEF_FALLBACK = """\
 You are the project assistant producing a short status brief for ONE project.
 ROLE
 A senior project manager summarising state-of-play for the owner. Factual,
 sharp, forward-looking. Never reassuring filler, never emojis.
 SCOPE
 Work only with project_id = {project_id}. Do not mention or pull data from
 other projects. Use tools to fetch fresh data:
 - get_project — current status, dates, description
 - list_tasks(project_id) — open work, split by status
 - list_timelines(project_id) — milestones hit, upcoming, overdue
 - list_notes(project_id) — any recent decisions or blockers
 - memory_get — relevant context about the client, collaborators, constraints
 STRUCTURE — follow exactly, one short paragraph per section, no headers
 1. **State.** One sentence: current phase, health (on track / at risk / blocked),
   and why. Cite the concrete signal (overdue milestone, stalled tasks, recent
   blocker note).
 2. **What's moving.** What was completed or progressed recently. Name specific
   tasks or milestones.
 3. **Next steps.** The 1-3 most important things the user should do next, in
   priority order. Be concrete — task name, who owns it, when due if known.
   If waiting on someone else, name them and what the ask is.
 4. **Risks / memory-flagged items.** One line max. Only include when there is
   a real risk or a relevant memory (e.g. late-paying client, tight deadline,
   scope change). Omit the section entirely if nothing to say.
 WHAT TO OMIT
 - Zero-counts ("no overdue tasks").
 - Generic advice ("keep up the good work").
 - Greetings, headers, bullet lists, emojis, sign-offs, meta-phrases.
 - XML/HTML tags or bracketed id lists. Plain prose only.
 VOICE
 - Direct. Factual. No fluff.
 - Use **bold** sparingly for task titles, milestone names, and the owner's name.
 - Short sentences. Prefer verbs over nouns ("Client review is blocking release"
  not "There is a blocker which is the client review").
 LENGTH
 4-8 sentences total across the 3-4 sections. Hard cap 8.
 Respond in the user's language ({language}). Today is {today}.\
 """
 def _resolve_language(context: dict[str, Any]) -> str:
    core = context.get("core_memory") or {}
    raw = (core.get("language") or "en").strip().lower()
    return _LANGUAGE_NAMES.get(raw, raw.title()) or "English"
 def _build_read_tools(user_id: str, trace_id: str | None) -> list[Any]:
    return [
        *TASK_READ_TOOLS,
        *PROJECT_READ_TOOLS,
        *TIMELINE_READ_TOOLS,
        *NOTE_READ_TOOLS,
        *_read_only_memory_tools(user_id, trace_id),
    ]
 async def run_home_brief(
    user_id: str,
    context: dict[str, Any],
 ) -> AsyncGenerator[tuple[str, Any], None]:
    """Stream a plain-text daily home brief.
    Yields (event_type, data) tuples identical to _run_single_agent_stream.
    Do NOT post-process output through _normalize_tagged_list_lines.
    """
    from app.agents.folder_agent import FOLDER_TOOLS
    trace_id = _trace_id_from_context(context)
    today = date.today().isoformat()
    language = _resolve_language(context)
    raw_template, langfuse_prompt = get_prompt_or_fallback("home_brief", _HOME_BRIEF_FALLBACK)
    system_prompt = compile_prompt(raw_template, langfuse_prompt, language=language, today=today)
    system_prompt += _relational_memory_injection(context)
    system_prompt += _proactive_hints_injection(context)
    system_prompt += _language_instruction(context)
    if today not in system_prompt:
        system_prompt += f"\nToday is {today}."
    brief_manifest = await build_brief_multi_project_manifest()
    system_prompt = system_prompt + ("\n\n" + brief_manifest if brief_manifest else "")
    tools = [*_build_read_tools(user_id, trace_id), *FOLDER_TOOLS]
    async for event in _run_single_agent_stream(
        user_id=user_id,
        system_prompt=system_prompt,
        message="Generate the daily brief.",
        context=context,
        langfuse_prompt=langfuse_prompt,
        agent_name="brief-agent",
        tools=tools,
    ):
        yield event
 async def run_project_brief(
    user_id: str,
    project_id: str,
    context: dict[str, Any],
 ) -> AsyncGenerator[tuple[str, Any], None]:
    """Stream a plain-text project status brief for project_id.
    Yields (event_type, data) tuples identical to _run_single_agent_stream.
    Do NOT post-process output through _normalize_tagged_list_lines.
    """
    trace_id = _trace_id_from_context(context)
    today = date.today().isoformat()
    language = _resolve_language(context)
    raw_template, langfuse_prompt = get_prompt_or_fallback("project_brief", _PROJECT_BRIEF_FALLBACK)
    system_prompt = compile_prompt(
        raw_template, langfuse_prompt,
        language=language, today=today, project_id=project_id,
    )
    system_prompt += _relational_memory_injection(context)
    system_prompt += _proactive_hints_injection(context)
    system_prompt += _language_instruction(context)
    if today not in system_prompt:
        system_prompt += f"\nToday is {today}."
    tools = _build_read_tools(user_id, trace_id)
    async for event in _run_single_agent_stream(
        user_id=user_id,
        system_prompt=system_prompt,
        message=f"Generate the project status brief for project {project_id}.",
        context=context,
        langfuse_prompt=langfuse_prompt,
        agent_name="brief-agent",
        tools=tools,
    ):
        yield event
--- a/app/core/deep_agent.py
+++ b/app/core/deep_agent.py
--- a/app/core/device_manager.py
+++ b/app/core/device_manager.py
@@ -0,0 +1,151 @@
 """Device connection manager.
 Maintains in-memory state for all active Electron → backend WebSocket
 connections.  One connection per user (latest replaces previous).
 The manager handles the **tool-call round-trip** pattern:
  - Backend sends ``tool_call`` frame → Electron executes the action →
    returns ``tool_result`` frame.
  - ``create_pending_call`` registers a Future keyed by ``call_id``.
  - ``resolve_pending_call`` fulfils the Future; callers awaiting it
    receive the result dict from Electron.
 This pattern is used by all tools (CRUD, file-system, etc.) via
 ``execute_on_client()`` in ``ws_context.py``.
 The ``device_manager`` module-level singleton is imported by both the
 device WS route and the agent runner.
 """
 from __future__ import annotations
 import asyncio
 import json
 import logging
 from dataclasses import dataclass, field
 from fastapi import WebSocket
 logger = logging.getLogger(__name__)
@dataclass
 class DeviceConnection:
    """State for a single connected Electron device."""
    ws: WebSocket
    device_id: str
    # Futures indexed by tool_call id — resolved when tool_result arrives.
    pending_calls: dict[str, asyncio.Future[dict]] = field(default_factory=dict)
 class DeviceConnectionManager:
    """Singleton registry of active Electron WebSocket connections.
    Thread/task safety note: asyncio is single-threaded by design.  All
    mutations happen inside await-points on the main event loop, so no
    locking is required for the in-memory dicts.
    """
    def __init__(self) -> None:
        self._connections: dict[str, DeviceConnection] = {}
    # ── Registration ──────────────────────────────────────────────────
    def register(self, user_id: str, device_id: str, ws: WebSocket) -> None:
        """Store the active connection for *user_id*, replacing any previous one."""
        if user_id in self._connections:
            old = self._connections[user_id]
            logger.info(
                "device_manager: replacing existing connection for user=%s device=%s",
                user_id,
                old.device_id,
            )
            # Cancel any futures that were waiting on the old connection.
            for fut in old.pending_calls.values():
                if not fut.done():
                    fut.cancel()
        self._connections[user_id] = DeviceConnection(ws=ws, device_id=device_id)
        logger.info(
            "device_manager: registered user=%s device=%s", user_id, device_id
        )
    def unregister(self, user_id: str) -> None:
        """Remove the connection for *user_id* and cancel any pending futures."""
        conn = self._connections.pop(user_id, None)
        if conn is None:
            return
        for fut in conn.pending_calls.values():
            if not fut.done():
                fut.cancel()
        logger.info("device_manager: unregistered user=%s", user_id)
    # ── Presence queries ──────────────────────────────────────────────
    def get_ws(self, user_id: str) -> WebSocket | None:
        """Return the active WebSocket for *user_id*, or ``None`` if offline."""
        conn = self._connections.get(user_id)
        return conn.ws if conn else None
    def is_online(self, user_id: str, device_id: str | None = None) -> bool:
        """Return ``True`` if the user has an active connection.
        If *device_id* is provided also checks that it matches the connected device.
        """
        conn = self._connections.get(user_id)
        if conn is None:
            return False
        if device_id is not None:
            return conn.device_id == device_id
        return True
    # ── Frame sending ─────────────────────────────────────────────────
    async def send_frame(self, user_id: str, frame: dict) -> None:
        """Send *frame* as a JSON text message to the device.
        Raises ``RuntimeError`` if the user is not connected.
        """
        conn = self._connections.get(user_id)
        if conn is None:
            raise RuntimeError(
                f"send_frame: user {user_id!r} is not connected"
            )
        await conn.ws.send_text(json.dumps(frame))
    # ── Tool-call round-trip ──────────────────────────────────────────
    def create_pending_call(
        self, user_id: str, call_id: str
    ) -> asyncio.Future[dict]:
        """Register a Future that will be resolved when the tool_result arrives.
        Raises ``RuntimeError`` if the user is not connected.
        """
        conn = self._connections.get(user_id)
        if conn is None:
            raise RuntimeError(
                f"create_pending_call: user {user_id!r} is not connected"
            )
        loop = asyncio.get_event_loop()
        fut: asyncio.Future[dict] = loop.create_future()
        conn.pending_calls[call_id] = fut
        return fut
    def resolve_pending_call(
        self, user_id: str, call_id: str, result: dict
    ) -> None:
        """Fulfil the Future registered under *call_id* with the Electron result.
        No-ops if the call_id is unknown (already timed out or cancelled).
        """
        conn = self._connections.get(user_id)
        if conn is None:
            return
        fut = conn.pending_calls.pop(call_id, None)
        if fut is not None and not fut.done():
            fut.set_result(result)
 # Module-level singleton — import this everywhere.
 device_manager = DeviceConnectionManager()
--- a/app/core/embeddings.py
+++ b/app/core/embeddings.py
@@ -0,0 +1,34 @@
 """OpenAI embedding helper for associative memory tier.
 Single public function: ``embed_text(text) -> list[float] | None``.
 Returns None on any failure — callers must implement a keyword fallback.
 Never raises; all exceptions are logged as warnings.
 """
 from __future__ import annotations
 import logging
 from openai import AsyncOpenAI
 logger = logging.getLogger(__name__)
 _MAX_INPUT_CHARS = 8000
 _EMBEDDING_MODEL = "text-embedding-3-small"
 async def embed_text(text: str) -> list[float] | None:
    """Call OpenAI text-embedding-3-small. Return None on failure (caller falls back to keyword)."""
    try:
        client = AsyncOpenAI()
        truncated = text[:_MAX_INPUT_CHARS]
        response = await client.embeddings.create(
            input=truncated,
            model=_EMBEDDING_MODEL,
        )
        result: list[float] = response.data[0].embedding
        logger.debug("embeddings: embed_text dims=%d", len(result))
        return result
    except Exception as exc:
        logger.warning("embeddings: embed_text failed: %s", exc)
        return None
--- a/app/core/folder_indexer.py
+++ b/app/core/folder_indexer.py
@@ -0,0 +1,183 @@
 """Per-file summarisation for project folder integration."""
 from __future__ import annotations
 import base64
 import io
 from dataclasses import dataclass
 from langchain_core.messages import HumanMessage, SystemMessage
 from pypdf import PdfReader
 from docx import Document as DocxDocument
 from app.core.langfuse_client import (
    compile_prompt,
    extract_usage,
    get_langfuse,
    get_prompt_or_fallback,
 )
 from app.core.llm import get_llm
 _TEXT_FALLBACK = (
    "You are summarising a file for an AI assistant that helps the user manage a project.\n"
    "Produce a single sentence (<=30 words, <=200 chars) that captures the file's purpose "
    "and most important detail.\nFile extension: {ext}\nFile name: {name}\nContent (truncated if long):\n{content}"
 )
 _IMAGE_FALLBACK = (
    "You are summarising an image attached to a project folder.\n"
    "Produce a single sentence (<=30 words, <=200 chars) describing what the image shows "
    "and any obvious purpose (logo, screenshot, diagram, photo of a whiteboard, etc.)."
 )
 _MAX_INPUT_CHARS = 6000
@dataclass
 class IndexResult:
    summary: str
    tokens_used: int
 async def _llm_text(messages: list) -> object:
    """Make the LLM call for text summarisation.
    Defined as a standalone async function so tests can patch it cleanly
    without needing to mock the LLM object itself.
    """
    llm = get_llm(model="gpt-4o-mini", temperature=0.2)
    return await llm.ainvoke(messages)
 async def _llm_vision(messages: list) -> object:
    """Make the LLM call for vision (image) summarisation.
    Accepts the message list and returns the response directly, mirroring
    the ``_llm_text`` caller pattern so tests can patch it at the module level.
    """
    llm = get_llm(model="gpt-4o-mini", temperature=0.2)
    return await llm.ainvoke(messages)
 async def summarize_image(*, image_b64: str, mime: str, file_name: str | None = None) -> IndexResult:
    """Return a compact summary of an image file using vision.
    Parameters
    ----------
    image_b64:
        Base64-encoded image bytes.
    mime:
        MIME type of the image, e.g. ``"image/png"``.
    file_name:
        Optional file name, attached to the Langfuse trace as input metadata.
    """
    template, prompt_obj = get_prompt_or_fallback("folder_file_summary_image", _IMAGE_FALLBACK)
    messages = [
        SystemMessage(content=template),
        HumanMessage(content=[
            {"type": "text", "text": "Summarise this image."},
            {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{image_b64}"}},
        ]),
    ]
    lf = get_langfuse()
    if lf is not None:
        with lf.start_as_current_observation(
            as_type="generation",
            name="folder-summarize-image",
            model="gpt-4o-mini",
            prompt=prompt_obj,
            input={"file_name": file_name, "mime": mime},
        ) as gen:
            response = await _llm_vision(messages)
            usage = extract_usage(response)
            gen.update(output=response.content, usage_details=usage)
    else:
        response = await _llm_vision(messages)
        usage = extract_usage(response)
    summary = (response.content or "").strip()[:500]
    return IndexResult(summary=summary, tokens_used=usage.get("total", 0))
 async def summarize_text(*, content: str, ext: str, name: str) -> IndexResult:
    """Return a compact summary of a text file.
    Parameters
    ----------
    content:
        Raw text content of the file (will be truncated to _MAX_INPUT_CHARS).
    ext:
        File extension including the leading dot, e.g. ``".md"``.
    name:
        File name, e.g. ``"kickoff.md"``.
    """
    template, prompt_obj = get_prompt_or_fallback("folder_file_summary_text", _TEXT_FALLBACK)
    truncated = content[:_MAX_INPUT_CHARS]
    compiled = compile_prompt(template, prompt_obj, ext=ext, name=name, content=truncated)
    messages = [
        SystemMessage(content=compiled),
        HumanMessage(content="Summarise this file."),
    ]
    lf = get_langfuse()
    if lf is not None:
        with lf.start_as_current_observation(
            as_type="generation",
            name="folder-summarize-text",
            model="gpt-4o-mini",
            prompt=prompt_obj,
            input={"file_name": name, "ext": ext, "content_chars": len(truncated)},
        ) as gen:
            response = await _llm_text(messages)
            usage = extract_usage(response)
            gen.update(output=response.content, usage_details=usage)
    else:
        response = await _llm_text(messages)
        usage = extract_usage(response)
    summary = (response.content or "").strip()[:500]
    return IndexResult(summary=summary, tokens_used=usage.get("total", 0))
 def _extract_pdf_text(pdf_b64: str) -> str:
    buf = io.BytesIO(base64.b64decode(pdf_b64))
    reader = PdfReader(buf)
    parts: list[str] = []
    for page in reader.pages:
        try:
            parts.append(page.extract_text() or "")
        except Exception:
            continue
    return "\n".join(parts).strip()
 def _extract_docx_text(docx_b64: str) -> str:
    buf = io.BytesIO(base64.b64decode(docx_b64))
    doc = DocxDocument(buf)
    return "\n".join(p.text for p in doc.paragraphs if p.text).strip()
 async def summarize_pdf(*, pdf_b64: str, name: str) -> IndexResult:
    """Return a compact summary of a PDF file.
    Parameters
    ----------
    pdf_b64:
        Base64-encoded PDF bytes.
    name:
        File name, e.g. ``"report.pdf"``.
    """
    text = _extract_pdf_text(pdf_b64)
    if not text:
        return IndexResult(summary="Could not extract text", tokens_used=0)
    return await summarize_text(content=text, ext=".pdf", name=name)
 async def summarize_docx(*, docx_b64: str, name: str) -> IndexResult:
    """Return a compact summary of a DOCX file.
    Parameters
    ----------
    docx_b64:
        Base64-encoded DOCX bytes.
    name:
        File name, e.g. ``"spec.docx"``.
    """
    text = _extract_docx_text(docx_b64)
    if not text:
        return IndexResult(summary="Could not extract text", tokens_used=0)
    return await summarize_text(content=text, ext=".docx", name=name)
--- a/app/core/langfuse_client.py
+++ b/app/core/langfuse_client.py
@@ -0,0 +1,190 @@
 """Langfuse observability — singleton client and prompt helpers.
 If LANGFUSE_SECRET_KEY / LANGFUSE_PUBLIC_KEY are not set,
 all helpers are no-ops so the app works without Langfuse configured.
 Usage
 -----
 Tracing::
    from app.core.langfuse_client import get_langfuse
    lf = get_langfuse()
    if lf:
        with lf.start_as_current_observation(as_type="span", name="my-agent") as span:
            span.update(input=user_message)
            # ... do work ...
            span.update(output=result)
        lf.flush()
 Prompt management::
    from app.core.langfuse_client import get_prompt_or_fallback
    text, prompt_obj = get_prompt_or_fallback("home_system", FALLBACK_PROMPT)
    # Use text as the system prompt; pass prompt_obj to generations for linking.
 Linking a prompt to a generation::
    with lf.start_as_current_observation(
        as_type="generation",
        name="llm-call",
        model="gpt-4o",
        prompt=prompt_obj,   # links generation → prompt version in the UI
        input=messages,
    ) as gen:
        response = await llm.ainvoke(messages)
        gen.update(output=response.content, usage=_usage(response))
 """
 from __future__ import annotations
 import hashlib
 import logging
 from contextlib import contextmanager
 from typing import Any, Generator
 logger = logging.getLogger(__name__)
 _client: Any = None
 _initialized: bool = False
 def get_langfuse() -> Any | None:
    """Return the Langfuse singleton, or ``None`` when not configured."""
    global _client, _initialized
    if _initialized:
        return _client
    _initialized = True
    from app.config.settings import settings  # local import to avoid circular deps
    if not settings.LANGFUSE_SECRET_KEY or not settings.LANGFUSE_PUBLIC_KEY:
        logger.debug("langfuse: not configured — observability disabled")
        return None
    try:
        from langfuse import Langfuse
        _client = Langfuse(
            secret_key=settings.LANGFUSE_SECRET_KEY,
            public_key=settings.LANGFUSE_PUBLIC_KEY,
            host=settings.LANGFUSE_BASE_URL,
        )
        logger.info("langfuse: client initialized host=%s", settings.LANGFUSE_BASE_URL)
    except Exception as exc:
        logger.warning("langfuse: failed to initialize: %s", exc)
        _client = None
    return _client
 def get_prompt_or_fallback(name: str, fallback: str) -> tuple[str, Any]:
    """Fetch a text prompt from Langfuse; fall back to ``fallback`` on any error.
    Returns ``(raw_template, prompt_obj_or_None)``.
    * ``raw_template`` — the uncompiled template string.  Do NOT call ``.format()``
      on it directly; use :func:`compile_prompt` instead so the correct variable
      syntax is applied (``{{var}}`` for Langfuse, ``{var}`` for the fallback).
    * ``prompt_obj`` — the Langfuse prompt object, or ``None`` when Langfuse is
      unavailable / the fetch failed.  Pass this to generation observations so
      Langfuse links the generation to the exact prompt version in the UI.
    """
    lf = get_langfuse()
    if lf is None:
        return fallback, None
    try:
        prompt = lf.get_prompt(name, label="production", fallback=fallback)
        # For text-type prompts .prompt holds the raw template string.
        raw = prompt.prompt if hasattr(prompt, "prompt") and isinstance(prompt.prompt, str) else fallback
        return raw, prompt
    except Exception as exc:
        logger.warning("langfuse: get_prompt %r failed: %s — using fallback", name, exc)
        return fallback, None
 def compile_prompt(template: str, prompt_obj: Any, **variables: Any) -> str:
    """Compile *template* with *variables*, choosing the right syntax.
    * When *prompt_obj* is a real Langfuse prompt object, calls
      ``prompt_obj.compile(**variables)`` which handles ``{{variable}}``
      substitution as defined in the Langfuse UI.
    * When *prompt_obj* is ``None`` (Langfuse unavailable or fetch failed),
      falls back to ``template.format(**variables)`` which handles the
      ``{variable}`` syntax used in the hardcoded fallback strings.
    This keeps callers oblivious to which syntax is in use.
    """
    if prompt_obj is not None:
        try:
            compiled = prompt_obj.compile(**variables)
            # compile() returns a string for text prompts.
            if isinstance(compiled, str):
                return compiled
            # Chat prompts return a list of dicts — join text parts.
            if isinstance(compiled, list):
                return "\n".join(
                    m.get("content", "") for m in compiled if isinstance(m, dict)
                )
        except Exception as exc:
            logger.warning(
                "langfuse: compile failed for prompt %r: %s — falling back to .format()",
                getattr(prompt_obj, "name", "?"),
                exc,
            )
    return template.format(**variables)
 def extract_usage(response: Any) -> dict[str, int]:
    """Extract token usage from a LangChain AI message into Langfuse format."""
    meta = getattr(response, "usage_metadata", None)
    if not meta:
        return {}
    return {
        "input": int(meta.get("input_tokens", 0)),
        "output": int(meta.get("output_tokens", 0)),
        "total": int(meta.get("total_tokens", 0)),
    }
 def hash_user_id(user_id: str) -> str:
    """Return a SHA-256 hash of *user_id* for use as Langfuse ``user_id``.
    This avoids sending raw database UUIDs to external observability services
    while still providing a stable, deterministic identifier for per-user
    metrics in the Langfuse dashboard.
    """
    return hashlib.sha256(user_id.encode()).hexdigest()
@contextmanager
 def langfuse_context(
    user_id: str | None = None,
    session_id: str | None = None,
 ) -> Generator[None, None, None]:
    """Propagate ``user_id`` (hashed) and ``session_id`` to all Langfuse observations.
    No-op when Langfuse is not configured or parameters are empty.
    """
    lf = get_langfuse()
    if lf is None or (not user_id and not session_id):
        yield
        return
    try:
        from langfuse import propagate_attributes
    except ImportError:
        logger.debug("langfuse: propagate_attributes not available — skipping context")
        yield
        return
    attrs: dict[str, str] = {}
    if user_id:
        attrs["user_id"] = hash_user_id(user_id)
    if session_id:
        attrs["session_id"] = session_id
    with propagate_attributes(**attrs):
        yield
--- a/app/core/llm.py
+++ b/app/core/llm.py
@@ -0,0 +1,156 @@
 """LLM factory — centralised model instantiation via LiteLLM.
 Every agent and the orchestrator call ``get_llm()``
 instead of directly constructing a provider-specific class.  The model string
 follows the `LiteLLM model naming convention
 <https://docs.litellm.ai/docs/providers>`_:
 * OpenAI:     ``gpt-4o``, ``gpt-4o-mini``
 * Anthropic:  ``anthropic/claude-3.5-sonnet``
 * Google:     ``gemini/gemini-pro``
 * Ollama:     ``ollama/llama3``
 * Bedrock:    ``bedrock/anthropic.claude-v2``
 Switch providers by changing **LLM_MODEL** in ``.env``
 — no code changes required.
 """
 from __future__ import annotations
 import os
 import warnings
 from collections.abc import Callable
 from openai import AsyncOpenAI
 import litellm
 from langchain_openai import ChatOpenAI
 from langchain_litellm import ChatLiteLLM
 from litellm import get_supported_openai_params  # noqa: F401 – validates install
 from app.config.settings import settings
 # Some models (e.g. gpt-5, o-series) reject unsupported params like temperature.
 # Drop them silently instead of raising UnsupportedParamsError.
 litellm.drop_params = True
 # Some provider responses include a plain dict in the `usage` field where a
 # richer Pydantic model is expected. This warning is noisy but non-fatal.
 warnings.filterwarnings(
    "ignore",
    message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`",
    category=UserWarning,
 )
 def _api_key_for_model(model: str) -> str | None:
    """Return the most appropriate API key for the given LiteLLM model string."""
    if model.startswith("anthropic/"):
        return settings.ANTHROPIC_API_KEY or None
    if model.startswith("gemini/") or model.startswith("google/"):
        return settings.GOOGLE_API_KEY or None
    if model.startswith("cerebras/"):
        return settings.CEREBRAS_API_KEY or None
    if model.startswith("groq/"):
        return settings.GROQ_API_KEY or None
    if model.startswith("deepseek/"):
        return settings.DEEPSEEK_API_KEY or None
    if model.startswith("github_copilot/"):
        # GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM.
        # No API key is required; returning None lets LiteLLM handle auth.
        return None
    # Default: OpenAI-compatible (covers plain model names like "gpt-4o")
    return settings.OPENAI_API_KEY or None
 def get_llm(
    *,
    model: str | None = None,
    temperature: float = 0,
 ) -> ChatOpenAI | ChatLiteLLM:
    """Return a LangChain chat model backed by LiteLLM.
    LiteLLM exposes an OpenAI-compatible API, so we use ``ChatOpenAI`` pointed
    at the LiteLLM proxy endpoint.  In practice, ``litellm`` patches the
    ``openai`` client transparently when the model string contains a provider
    prefix (``anthropic/…``, ``gemini/…``, etc.).
    Parameters
    ----------
    model:
        LiteLLM model identifier. Defaults to ``settings.LLM_MODEL``.
    temperature:
        Sampling temperature.  ``0`` = deterministic.
    """
    model = model or settings.LLM_MODEL
    # Point LiteLLM to the custom token directory when configured.
    if settings.GITHUB_COPILOT_TOKEN_DIR:
        os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
    # Use ChatLiteLLM for provider-prefixed models (github_copilot/, anthropic/, etc.)
    # so LiteLLM handles routing and auth. ChatOpenAI for plain OpenAI model names.
    if "/" in model:
        return ChatLiteLLM(model=model, temperature=temperature)
    return ChatOpenAI(
        model=model,
        temperature=temperature,
        api_key=_api_key_for_model(model),
    )
 _AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = {
    "classifier":          lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL,
    "home-agent":          lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL,
    "unified-processor":   lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL,
    "cloud-processor":     lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL,
    "brief-agent":         lambda: settings.LLM_MODEL_BRIEF_AGENT or settings.LLM_MODEL,
    "task-brief-agent":    lambda: settings.LLM_MODEL_TASK_BRIEF_AGENT or settings.LLM_MODEL,
    "setup":               lambda: settings.LLM_MODEL_SETUP_AGENT or settings.LLM_MODEL,
    "memory-extractor":    lambda: settings.LLM_MODEL_MEMORY_EXTRACTOR or "gpt-4o-mini",
    "memory-miner":        lambda: settings.LLM_MODEL_MEMORY_MINER or "gpt-4o-mini",
    "memory-auditor":      lambda: settings.LLM_MODEL_MEMORY_AUDITOR or settings.LLM_MODEL,
    "note-summarizer":     lambda: "gpt-4o-mini",
 }
 def model_for_agent(agent_name: str) -> str:
    """Return the resolved model string for *agent_name* (for Langfuse tracking)."""
    return _AGENT_MODEL_SETTINGS.get(agent_name, lambda: settings.LLM_MODEL)()
 def get_agent_llm(
    agent_name: str,
    *,
    temperature: float = 0,
 ) -> ChatOpenAI | ChatLiteLLM:
    """Return an LLM configured for *agent_name*, respecting per-agent overrides.
    Falls back to ``settings.LLM_MODEL`` for unknown agent names or when the
    per-agent override is left empty in ``.env``.
    """
    model = model_for_agent(agent_name)
    return get_llm(model=model, temperature=temperature)
 async def embed(text: str) -> list[float]:
    """Return an embedding vector for *text*.
    Uses ``settings.LLM_EMBED_MODEL`` so the same provider switch in ``.env``
    (e.g. ``github_copilot/text-embedding-3-small``) applies here without any
    code changes.  Falls back to the raw AsyncOpenAI client for plain OpenAI
    model names to preserve existing behaviour.
    """
    model = settings.LLM_EMBED_MODEL
    if model.startswith("github_copilot/") or "/" in model:
        # Use LiteLLM for all provider-prefixed models (Copilot, Bedrock, etc.)
        # so the provider's auth mechanism is applied correctly.
        response = await litellm.aembedding(model=model, input=[text])
        return response.data[0]["embedding"]
    # Plain OpenAI model name — use the raw AsyncOpenAI client (existing path).
    client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
    response = await client.embeddings.create(model=model, input=text)
    return response.data[0].embedding
--- a/app/core/memory_extraction.py
+++ b/app/core/memory_extraction.py
@@ -0,0 +1,450 @@
 """Mem0-style Extract/Update pipeline — Phase 2.
 Runs after every ``store_episode`` call to distil durable facts, preferences,
 routines, and relations from the latest conversation turn.
 Entry point: ``run_extraction(db, user_id, last_user_msg, last_assistant_msg, session_id)``
 Design notes
 ------------
 - Two gpt-4o-mini calls per turn: extract candidates, then decide action per candidate.
 - Short-circuit: if no existing neighbours → ADD without a second LLM call (cost saving).
 - Zero-trust: never logs decrypted user content; relation subject/object labels are
  treated as identifiers (safe to log per spec).
 - Must not raise into the request path — caller wraps in asyncio.create_task().
 """
 from __future__ import annotations
 import json
 import logging
 from typing import Any, Literal
 from pydantic import BaseModel, Field
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.core.langfuse_client import get_langfuse, get_prompt_or_fallback, extract_usage, langfuse_context
 from app.core.llm import get_agent_llm, model_for_agent
 logger = logging.getLogger(__name__)
 # ── Fallback prompts (used when Langfuse unavailable) ─────────────────────────
 _EXTRACTION_FALLBACK = (
    "You are a memory extractor for a personal AI secretary. Given the last conversation "
    "turn, the user's core memory, and recent episode summaries, identify durable facts, "
    "preferences, routines, and person/project relations worth remembering.\n\n"
    "Output JSON matching this schema exactly:\n"
    '{{"candidates": [{{"type": "<fact|preference|relation|routine>", '
    '"content": "<short canonical statement>", '
    '"target_tier": "<core|associative|relational|proactive>", '
    '"subject": null, "predicate": null, "object": null, "confidence": 0.7}}]}}\n\n'
    "Rules:\n"
    "- Skip small talk, greetings, one-off questions.\n"
    "- Max 5 candidates per call.\n"
    "- Only extract durable information (still true next week).\n"
    "- For type=relation: subject/predicate/object required.\n"
    "- Default confidence=0.7.\n\n"
    "## Last turn\n{last_turn}\n\n"
    "## Core memory (current)\n{core_memory}\n\n"
    "## Recent episodes\n{recent_episodes}"
 )
 _DECIDE_FALLBACK = (
    "You are a memory update decision engine. Given a new memory candidate and a list of "
    "existing memories from the same tier, decide what action to take.\n\n"
    "Respond with exactly one word: ADD, UPDATE, DELETE, or NOOP.\n\n"
    "- ADD: new information not in existing memories.\n"
    "- UPDATE: contradicts or supersedes an existing memory.\n"
    "- DELETE: states something is no longer true.\n"
    "- NOOP: already captured accurately.\n\n"
    "## New candidate\n{candidate}\n\n"
    "## Existing memories (same tier, top neighbours)\n{existing_memories}"
 )
 # ── Pydantic schemas ───────────────────────────────────────────────────────────
 class MemoryCandidate(BaseModel):
    type: Literal["fact", "preference", "relation", "routine"]
    content: str
    target_tier: Literal["core", "associative", "relational", "proactive"]
    subject: str | None = None
    predicate: str | None = None
    object: str | None = None
    confidence: float = Field(default=0.7, ge=0.0, le=1.0)
 class ExtractionResult(BaseModel):
    candidates: list[MemoryCandidate] = Field(default_factory=list)
 # ── Task 2.1 — Extract candidates ─────────────────────────────────────────────
 async def extract_candidates(
    last_turn: str,
    core_memory: dict[str, str],
    recent_episodes: list[str],
 ) -> ExtractionResult:
    """Call gpt-4o-mini to extract memory candidates from the latest turn.
    Returns an ExtractionResult (may be empty on failure — never raises).
    """
    core_str = "\n".join(f"{k}: {v}" for k, v in core_memory.items()) or "(empty)"
    episodes_str = "\n---\n".join(recent_episodes[-5:]) or "(none)"
    template, prompt_obj = get_prompt_or_fallback("memory_extraction", _EXTRACTION_FALLBACK)
    # Compile with Langfuse variable syntax ({{var}}) or fallback {var}
    if prompt_obj is not None:
        try:
            system_text = prompt_obj.compile(
                last_turn=last_turn,
                core_memory=core_str,
                recent_episodes=episodes_str,
            )
            if isinstance(system_text, list):
                system_text = "\n".join(m.get("content", "") for m in system_text if isinstance(m, dict))
        except Exception as exc:
            logger.warning("memory_extraction: compile failed: %s", exc)
            system_text = template.format(
                last_turn=last_turn,
                core_memory=core_str,
                recent_episodes=episodes_str,
            )
    else:
        system_text = template.format(
            last_turn=last_turn,
            core_memory=core_str,
            recent_episodes=episodes_str,
        )
    llm = get_agent_llm("memory-extractor", temperature=0)
    # Bind JSON mode so the model always returns parseable output.
    llm_json = llm.bind(response_format={"type": "json_object"})  # type: ignore[attr-defined]
    lf = get_langfuse()
    try:
        from langchain_core.messages import HumanMessage, SystemMessage  # noqa: PLC0415
        messages = [
            SystemMessage(content=system_text),
            HumanMessage(content="Extract memory candidates as JSON."),
        ]
        if lf:
            with lf.start_as_current_observation(
                as_type="generation",
                name="memory-extraction",
                model=model_for_agent("memory-extractor"),
                prompt=prompt_obj,
                input=messages,
            ) as gen:
                response = await llm_json.ainvoke(messages)
                gen.update(output=response.content, usage=extract_usage(response))
        else:
            response = await llm_json.ainvoke(messages)
        raw = json.loads(response.content)
        result = ExtractionResult.model_validate(raw)
        logger.info("memory_extraction: extracted %d candidates", len(result.candidates))
        return result
    except Exception as exc:
        logger.warning("memory_extraction: extract_candidates failed: %s", exc)
        return ExtractionResult(candidates=[])
 # ── Task 2.2 — Decide action ──────────────────────────────────────────────────
 async def decide_action(
    candidate: MemoryCandidate,
    existing: list[str],
 ) -> Literal["ADD", "UPDATE", "DELETE", "NOOP"]:
    """Decide what to do with a candidate given existing memories in the same tier.
    Short-circuits to ADD without an LLM call when existing is empty (cost saving).
    Never raises.
    """
    if not existing:
        return "ADD"
    candidate_str = f"[{candidate.type}] {candidate.content}"
    existing_str = "\n".join(f"- {m}" for m in existing)
    template, prompt_obj = get_prompt_or_fallback("memory_decide_action", _DECIDE_FALLBACK)
    if prompt_obj is not None:
        try:
            system_text = prompt_obj.compile(
                candidate=candidate_str,
                existing_memories=existing_str,
            )
            if isinstance(system_text, list):
                system_text = "\n".join(m.get("content", "") for m in system_text if isinstance(m, dict))
        except Exception as exc:
            logger.warning("memory_extraction: decide compile failed: %s", exc)
            system_text = template.format(candidate=candidate_str, existing_memories=existing_str)
    else:
        system_text = template.format(candidate=candidate_str, existing_memories=existing_str)
    llm = get_agent_llm("memory-extractor", temperature=0)
    lf = get_langfuse()
    try:
        from langchain_core.messages import HumanMessage, SystemMessage  # noqa: PLC0415
        messages = [
            SystemMessage(content=system_text),
            HumanMessage(content="Decide action."),
        ]
        if lf:
            with lf.start_as_current_observation(
                as_type="generation",
                name="memory-decide-action",
                model=model_for_agent("memory-extractor"),
                prompt=prompt_obj,
                input=messages,
            ) as gen:
                response = await llm.ainvoke(messages)
                gen.update(output=response.content, usage=extract_usage(response))
        else:
            response = await llm.ainvoke(messages)
        verb = response.content.strip().upper()
        if verb in ("ADD", "UPDATE", "DELETE", "NOOP"):
            return verb  # type: ignore[return-value]
        logger.warning("memory_extraction: unexpected decide verb=%r, defaulting ADD", verb)
        return "ADD"
    except Exception as exc:
        logger.warning("memory_extraction: decide_action failed: %s", exc)
        return "ADD"
 # ── Task 2.3 — Pipeline orchestrator ──────────────────────────────────────────
 async def run_extraction(
    db: AsyncSession,
    user_id: str,
    last_user_msg: str,
    last_assistant_msg: str,
    session_id: str | None,
 ) -> None:
    """Full Mem0-style extract/update pipeline for one conversation turn.
    Steps:
    1. Load core memory + last 5 episodes.
    2. extract_candidates() → up to 5 MemoryCandidate objects.
    3. For each candidate: find top-3 neighbours → decide_action() → apply.
    4. Trace via Langfuse.
    Never raises — wraps everything in try/except.
    """
    try:
        await _run_extraction_inner(db, user_id, last_user_msg, last_assistant_msg, session_id)
    except Exception as exc:
        logger.warning("memory_extraction: run_extraction failed user=%s: %s", user_id, exc)
 async def _run_extraction_inner(
    db: AsyncSession,
    user_id: str,
    last_user_msg: str,
    last_assistant_msg: str,
    session_id: str | None,
 ) -> None:
    from app.core.memory_middleware import MemoryMiddleware  # noqa: PLC0415
    middleware = MemoryMiddleware(db)
    fernet = await middleware._get_fernet(user_id)
    if fernet is None:
        logger.warning("memory_extraction: no fernet for user=%s, skipping", user_id)
        return
    # 1. Load context
    core: dict[str, str] = await middleware._load_core(user_id, fernet)
    episodes: list[str] = await middleware._load_episodic(user_id, fernet, session_id=session_id)
    last_turn = f"User: {last_user_msg}\nAssistant: {last_assistant_msg}"
    lf = get_langfuse()
    async def _run(trace_id: str | None) -> dict[str, Any]:
        # 2. Extract candidates
        result = await extract_candidates(last_turn, core, episodes)
        if not result.candidates:
            logger.info("memory_extraction: no candidates user=%s", user_id)
            return {"candidates": 0, "applied": 0}
        logger.info(
            "memory_extraction: processing %d candidates user=%s trace=%s",
            len(result.candidates),
            user_id,
            trace_id or "-",
        )
        # 3. Apply each candidate
        applied = 0
        actions: list[str] = []
        for candidate in result.candidates:
            try:
                await _apply_candidate(middleware, db, user_id, fernet, candidate, trace_id)
                applied += 1
                actions.append(f"{candidate.type}:{candidate.target_tier}")
            except Exception as exc:
                logger.warning(
                    "memory_extraction: apply failed candidate=%r user=%s: %s",
                    candidate.content[:80],
                    user_id,
                    exc,
                )
        logger.info(
            "memory_extraction: applied %d/%d candidates user=%s",
            applied,
            len(result.candidates),
            user_id,
        )
        return {"candidates": len(result.candidates), "applied": applied, "actions": actions}
    with langfuse_context(user_id=user_id, session_id=session_id):
        if lf:
            with lf.start_as_current_observation(
                as_type="span",
                name="memory-extraction-pipeline",
                input={"last_turn_preview": last_turn[:200]},
            ) as span:
                summary = await _run(trace_id=span.id)
                span.update(output=summary)
            try:
                lf.flush()
            except Exception:
                pass
        else:
            await _run(trace_id=None)
 async def _apply_candidate(
    middleware: Any,
    db: AsyncSession,
    user_id: str,
    fernet: Any,
    candidate: MemoryCandidate,
    trace_id: str | None,
 ) -> None:
    """Fetch neighbours, decide action, apply to the appropriate tier."""
    neighbours: list[str] = []
    if candidate.target_tier == "core":
        # For core tier: neighbours are existing core block values for similar keys.
        blocks = await middleware.list_core_blocks(user_id)
        neighbours = [b["value"] for b in blocks[:3]]
    elif candidate.target_tier == "associative":
        neighbours = await middleware.search_archival(user_id, candidate.content, top_k=3)
    elif candidate.target_tier == "relational":
        # Relation candidates handled specially — passed to upsert_relation directly.
        # Neighbours: search by subject label if available.
        neighbours = []
    elif candidate.target_tier == "proactive":
        neighbours = await middleware.search_recall(user_id, candidate.content, top_k=3)
    action = await decide_action(candidate, neighbours)
    logger.info(
        "memory_extraction: candidate type=%s tier=%s action=%s",
        candidate.type,
        candidate.target_tier,
        action,
    )
    if action == "NOOP":
        return
    if candidate.target_tier == "relational":
        # Always upsert relations — decide_action skipped (no neighbour search).
        if candidate.subject and candidate.predicate and candidate.object:
            await _upsert_relation(
                middleware, db, user_id, candidate, trace_id
            )
        return
    if action in ("ADD", "UPDATE"):
        if candidate.target_tier == "core":
            # Derive a short key from the content (first 40 chars, snake_cased).
            key = _content_to_key(candidate.content)
            await middleware.update_core(user_id, key, candidate.content, trace_id=trace_id)
        elif candidate.target_tier == "associative":
            await middleware.store_associative(user_id, candidate.content)
        elif candidate.target_tier == "proactive":
            await _store_proactive_stub(middleware, db, user_id, candidate, fernet)
    elif action == "DELETE":
        if candidate.target_tier == "core":
            key = _content_to_key(candidate.content)
            await middleware.delete_core(user_id, key)
 def _content_to_key(content: str) -> str:
    """Derive a short snake_case key from a content string (first 40 chars)."""
    import re  # noqa: PLC0415
    slug = re.sub(r"[^a-z0-9]+", "_", content[:40].lower()).strip("_")
    return slug or "memory"
 async def _upsert_relation(
    middleware: Any,
    db: AsyncSession,
    user_id: str,
    candidate: MemoryCandidate,
    trace_id: str | None,
 ) -> None:
    """Upsert a relation row via MemoryMiddleware.upsert_relation (Phase 3)."""
    await middleware.upsert_relation(
        user_id=user_id,
        subject=candidate.subject or "unknown",
        subject_type="unknown",
        predicate=candidate.predicate or "related_to",
        object_=candidate.object or "unknown",
        object_type="unknown",
        confidence=candidate.confidence,
    )
    logger.info(
        "memory_extraction: upserted relation subject=%s predicate=%s object=%s",
        candidate.subject,
        candidate.predicate,
        candidate.object,
    )
 async def _store_proactive_stub(
    middleware: Any,
    db: AsyncSession,
    user_id: str,
    candidate: MemoryCandidate,
    fernet: Any,
 ) -> None:
    """Store a proactive pattern row directly (MemoryProactive model)."""
    import uuid  # noqa: PLC0415
    from app.models import MemoryProactive  # noqa: PLC0415
    from app.core.memory_middleware import _encrypt  # noqa: PLC0415
    encrypted = _encrypt(fernet, candidate.content)
    row = MemoryProactive(
        id=str(uuid.uuid4()),
        user_id=user_id,
        pattern_encrypted=encrypted,
        confidence=candidate.confidence,
        source="inferred",
    )
    db.add(row)
    try:
        await db.commit()
        logger.info("memory_extraction: stored proactive pattern user=%s", user_id)
    except Exception as exc:
        logger.warning("memory_extraction: store proactive failed: %s", exc)
        await db.rollback()
--- a/app/core/memory_maintenance.py
+++ b/app/core/memory_maintenance.py
@@ -0,0 +1,581 @@
 """Memory maintenance jobs — Phase 3/5.
 Three entrypoints called by the scheduler (APScheduler) registered in app/main.py:
  drain_extraction_queue(db) — Free-tier batch extraction (Phase 2/5).
  mine_proactive_patterns(db, user_id) — Power+ pattern mining (Phase 5).
  decay_relations(db, user_id) — confidence decay + pruning for memory_relations (Phase 3).
 All are safe to call manually or from tests; they never raise.
 """
 from __future__ import annotations
 import json
 import logging
 import uuid
 from datetime import datetime, timedelta, timezone
 from cryptography.fernet import Fernet
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback
 from app.models import MemoryAssociative, MemoryEpisodic, MemoryProactive, MemoryRelation, User
 logger = logging.getLogger(__name__)
 # Decay parameters for relations
 _DECAY_FACTOR = 0.95
 _DECAY_PERIOD_DAYS = 30
 _PRUNE_THRESHOLD = 0.2
 # Proactive pattern decay: 10 % per 7 days since last sighting
 _PROACTIVE_DECAY_FACTOR = 0.9
 _PROACTIVE_DECAY_PERIOD_DAYS = 7
 _PROACTIVE_PRUNE_THRESHOLD = 0.2
 # Mining: require at least this many episodes to attempt pattern extraction
 _MIN_EPISODES_FOR_MINING = 3
 _MINING_LOOKBACK_DAYS = 30
 # Audit: caps to control token cost
 _AUDIT_MAX_FACTS = 50
 _AUDIT_MAX_LABELS = 100
 async def decay_relations(db: AsyncSession, user_id: str) -> None:
    """Apply confidence decay to all relation rows for a user.
    Decay rule: confidence *= 0.95 for every 30 days since last_confirmed_at.
    Rows whose confidence falls below 0.2 are deleted.
    Never raises — wraps in try/except.
    """
    try:
        await _decay_relations_inner(db, user_id)
    except Exception as exc:
        logger.warning("memory_maintenance: decay_relations failed user=%s: %s", user_id, exc)
 async def _decay_relations_inner(db: AsyncSession, user_id: str) -> None:
    result = await db.execute(
        select(MemoryRelation).where(MemoryRelation.user_id == user_id)
    )
    rows = result.scalars().all()
    now = datetime.now(timezone.utc)
    deleted = 0
    decayed = 0
    for row in rows:
        reference = row.last_confirmed_at or row.created_at
        if reference is None:
            continue
        if reference.tzinfo is None:
            reference = reference.replace(tzinfo=timezone.utc)
        days_elapsed = (now - reference).days
        if days_elapsed < _DECAY_PERIOD_DAYS:
            continue
        periods = days_elapsed // _DECAY_PERIOD_DAYS
        new_confidence = row.confidence * (_DECAY_FACTOR ** periods)
        if new_confidence < _PRUNE_THRESHOLD:
            await db.delete(row)
            deleted += 1
            logger.info(
                "memory_maintenance: pruned relation id=%s user=%s subject=%s predicate=%s "
                "confidence=%.3f (below threshold)",
                row.id, user_id, row.subject_label, row.predicate, new_confidence,
            )
        else:
            row.confidence = new_confidence
            decayed += 1
    try:
        await db.commit()
        logger.info(
            "memory_maintenance: decay_relations user=%s decayed=%d deleted=%d",
            user_id, decayed, deleted,
        )
    except Exception as exc:
        logger.warning("memory_maintenance: decay_relations commit failed user=%s: %s", user_id, exc)
        await db.rollback()
 async def drain_extraction_queue(db: AsyncSession) -> None:
    """Process pending ExtractionQueue rows for Free-tier users.
    Each row corresponds to a stored episode that should be fed through the
    Mem0-style extraction pipeline. Rows are deleted after successful processing.
    Never raises — wraps in try/except.
    """
    try:
        await _drain_extraction_queue_inner(db)
    except Exception as exc:
        logger.warning("memory_maintenance: drain_extraction_queue failed: %s", exc)
 async def _drain_extraction_queue_inner(db: AsyncSession) -> None:
    from app.models import ExtractionQueue  # noqa: PLC0415
    result = await db.execute(select(ExtractionQueue))
    rows = result.scalars().all()
    if not rows:
        logger.debug("memory_maintenance: drain_extraction_queue nothing to drain")
        return
    logger.info("memory_maintenance: drain_extraction_queue pending=%d", len(rows))
    from app.core.memory_extraction import run_extraction  # noqa: PLC0415
    processed = 0
    for row in rows:
        try:
            await run_extraction(
                db=db,
                user_id=row.user_id,
                last_user_msg="",
                last_assistant_msg="",
                session_id=None,
            )
            await db.delete(row)
            await db.commit()
            processed += 1
        except Exception as exc:
            logger.warning(
                "memory_maintenance: drain failed row=%s user=%s: %s",
                row.id, row.user_id, exc,
            )
            await db.rollback()
    logger.info("memory_maintenance: drain_extraction_queue processed=%d/%d", processed, len(rows))
 async def mine_proactive_patterns(db: AsyncSession, user_id: str) -> None:
    """Mine recurring behavioral patterns from last 30 days of episodes (Power+ only).
    Steps:
    1. Gate on proactive_mining tier feature.
    2. Load + decrypt last 30 days of episodic summaries.
    3. Call gpt-4o-mini to identify recurring patterns.
    4. Encrypt and store each pattern in memory_proactive.
    5. Apply decay to existing proactive rows.
    Never raises — wraps in try/except.
    """
    try:
        await _mine_proactive_patterns_inner(db, user_id)
    except Exception as exc:
        logger.warning("memory_maintenance: mine_proactive_patterns failed user=%s: %s", user_id, exc)
 async def _mine_proactive_patterns_inner(db: AsyncSession, user_id: str) -> None:
    from app.billing.tier_manager import tier_manager  # noqa: PLC0415
    tier = await tier_manager.get_tier(user_id, db)
    if not tier_manager.check_feature(tier, "proactive_mining"):
        logger.debug("memory_maintenance: mine_proactive_patterns skipped (tier=%s)", tier)
        return
    # Load user Fernet key
    result = await db.execute(select(User).where(User.id == user_id))
    user = result.scalar_one_or_none()
    if user is None or not user.encryption_key:
        logger.warning("memory_maintenance: mine_proactive_patterns no encryption_key user=%s", user_id)
        return
    fernet = Fernet(user.encryption_key.encode())
    cutoff = datetime.now(timezone.utc) - timedelta(days=_MINING_LOOKBACK_DAYS)
    episodes_result = await db.execute(
        select(MemoryEpisodic)
        .where(
            MemoryEpisodic.user_id == user_id,
            MemoryEpisodic.created_at >= cutoff,
        )
        .order_by(MemoryEpisodic.created_at.asc())
    )
    episode_rows = episodes_result.scalars().all()
    if len(episode_rows) < _MIN_EPISODES_FOR_MINING:
        logger.info(
            "memory_maintenance: mine_proactive_patterns skipped user=%s episodes=%d (< %d)",
            user_id, len(episode_rows), _MIN_EPISODES_FOR_MINING,
        )
        return
    summaries: list[str] = []
    for ep in episode_rows:
        try:
            plaintext = fernet.decrypt(ep.summary_encrypted.encode()).decode()
            summaries.append(plaintext)
        except Exception:
            pass
    if not summaries:
        return
    patterns = await _extract_proactive_patterns(summaries)
    if not patterns:
        logger.info("memory_maintenance: mine_proactive_patterns user=%s no patterns extracted", user_id)
        return
    stored = 0
    for pattern_text in patterns:
        try:
            encrypted = fernet.encrypt(pattern_text.encode()).decode()
            row = MemoryProactive(
                id=str(uuid.uuid4()),
                user_id=user_id,
                pattern_encrypted=encrypted,
                confidence=0.7,
                source="inferred",
            )
            db.add(row)
            stored += 1
        except Exception as exc:
            logger.warning("memory_maintenance: failed to store pattern user=%s: %s", user_id, exc)
    try:
        await db.commit()
        logger.info(
            "memory_maintenance: mine_proactive_patterns user=%s stored=%d",
            user_id, stored,
        )
    except Exception as exc:
        logger.warning("memory_maintenance: mine_proactive_patterns commit failed user=%s: %s", user_id, exc)
        await db.rollback()
        return
    await _decay_proactive_patterns(db, user_id, fernet)
 async def _extract_proactive_patterns(summaries: list[str]) -> list[str]:
    """Call memory-miner LLM to identify recurring behavioral/temporal patterns."""
    from app.core.llm import get_agent_llm  # noqa: PLC0415
    llm = get_agent_llm("memory-miner", temperature=0)
    combined = "\n---\n".join(summaries[-20:])  # cap at last 20 to control token usage
    prompt = (
        "You are analyzing conversation history for a personal AI secretary. "
        "Identify 3-5 recurring temporal or behavioral patterns (e.g. 'always works late on Thursdays', "
        "'prefers bullet-point summaries', 'frequently asks about Project Acme status'). "
        "Return each pattern as a plain, short English sentence on its own line. "
        "No numbering, no bullet points, no extra text.\n\n"
        f"Conversation history:\n{combined}"
    )
    try:
        response = await llm.ainvoke(prompt)
        text = response.content if hasattr(response, "content") else str(response)
        lines = [line.strip() for line in str(text).splitlines() if line.strip()]
        return lines[:5]
    except Exception as exc:
        logger.warning("memory_maintenance: _extract_proactive_patterns LLM failed: %s", exc)
        return []
 async def _decay_proactive_patterns(db: AsyncSession, user_id: str, fernet: Fernet) -> None:
    """Decay confidence of existing proactive patterns; prune below threshold."""
    result = await db.execute(
        select(MemoryProactive).where(MemoryProactive.user_id == user_id)
    )
    rows = result.scalars().all()
    now = datetime.now(timezone.utc)
    deleted = 0
    decayed = 0
    for row in rows:
        reference = row.created_at
        if reference is None:
            continue
        if reference.tzinfo is None:
            reference = reference.replace(tzinfo=timezone.utc)
        days_elapsed = (now - reference).days
        if days_elapsed < _PROACTIVE_DECAY_PERIOD_DAYS:
            continue
        periods = days_elapsed // _PROACTIVE_DECAY_PERIOD_DAYS
        new_confidence = row.confidence * (_PROACTIVE_DECAY_FACTOR ** periods)
        if new_confidence < _PROACTIVE_PRUNE_THRESHOLD:
            await db.delete(row)
            deleted += 1
        else:
            row.confidence = new_confidence
            decayed += 1
    try:
        await db.commit()
        logger.info(
            "memory_maintenance: decay_proactive user=%s decayed=%d deleted=%d",
            user_id, decayed, deleted,
        )
    except Exception as exc:
        logger.warning("memory_maintenance: decay_proactive commit failed user=%s: %s", user_id, exc)
        await db.rollback()
 # ── Phase 7: weekly memory audit ──────────────────────────────────────────────
 _AUDIT_CONTRADICTIONS_FALLBACK = (
    "You are auditing a personal AI assistant's memory bank. "
    "Each fact has an ID in brackets. "
    "Find pairs that directly contradict each other "
    "(e.g. 'prefers morning meetings' vs 'never schedules before noon'). "
    "For each contradiction, pick the ID to DELETE (the older or less specific one). "
    'Return ONLY a valid JSON array, no markdown fences: '
    '[{{"delete": "<id>", "reason": "<one line>"}}]. '
    "If no contradictions, return [].\n\n"
    "Facts:\n{facts}"
 )
 _AUDIT_CANONICALIZE_FALLBACK = (
    "You are auditing entity labels in a personal AI assistant's relational memory. "
    "These are names of people, companies, projects, or topics. "
    "Group labels that clearly refer to the same real-world entity "
    "(e.g. 'giulia', 'Giulia', 'Giulia R.' → canonical 'Giulia'). "
    "Return ONLY a valid JSON array, no markdown fences: "
    '[{{"canonical": "<best label>", "variants": ["<v1>", "<v2>"]}}]. '
    "Only include groups with at least one variant. Singletons: omit.\n\n"
    "Labels:\n{labels}"
 )
 async def audit_memory(db: AsyncSession, user_id: str) -> None:
    """Weekly audit: contradiction scan on associative facts + label canonicalization on relations.
    Steps:
    1. Decrypt up to _AUDIT_MAX_FACTS associative rows; send list to memory-auditor LLM.
    2. LLM flags rows to delete (direct contradictions); hard-delete them.
    3. Collect unique subject/object labels from memory_relations; ask LLM to group duplicates.
    4. Rewrite variant labels to their canonical form in-place.
    Never raises — wraps in try/except.
    """
    try:
        await _audit_memory_inner(db, user_id)
    except Exception as exc:
        logger.warning("memory_maintenance: audit_memory failed user=%s: %s", user_id, exc)
 async def _audit_memory_inner(db: AsyncSession, user_id: str) -> None:
    result = await db.execute(select(User).where(User.id == user_id))
    user = result.scalar_one_or_none()
    if user is None or not user.encryption_key:
        logger.warning("memory_maintenance: audit_memory no encryption_key user=%s", user_id)
        return
    fernet = Fernet(user.encryption_key.encode())
    await _scan_associative_contradictions(db, user_id, fernet)
    await _canonicalize_relation_labels(db, user_id)
 async def _scan_associative_contradictions(
    db: AsyncSession,
    user_id: str,
    fernet: Fernet,
 ) -> None:
    """Decrypt associative facts, ask LLM to flag contradictions, delete superseded rows."""
    result = await db.execute(
        select(MemoryAssociative)
        .where(MemoryAssociative.user_id == user_id)
        .order_by(MemoryAssociative.updated_at.desc())
        .limit(_AUDIT_MAX_FACTS)
    )
    rows = result.scalars().all()
    if len(rows) < 2:
        return
    id_to_text: dict[str, str] = {}
    for row in rows:
        try:
            plaintext = fernet.decrypt(row.content_encrypted.encode()).decode()
            id_to_text[row.id] = plaintext
        except Exception:
            pass
    if len(id_to_text) < 2:
        return
    id_list = list(id_to_text.keys())
    numbered = "\n".join(
        f"{i + 1}. [{rid}] {id_to_text[rid]}" for i, rid in enumerate(id_list)
    )
    template, prompt_obj = get_prompt_or_fallback(
        "memory_audit_contradictions", _AUDIT_CONTRADICTIONS_FALLBACK
    )
    system_text = compile_prompt(template, prompt_obj, facts=numbered)
    from app.core.llm import get_agent_llm, model_for_agent  # noqa: PLC0415
    from langchain_core.messages import HumanMessage, SystemMessage  # noqa: PLC0415
    llm = get_agent_llm("memory-auditor", temperature=0)
    lf = get_langfuse()
    messages = [
        SystemMessage(content=system_text),
        HumanMessage(content="Audit facts for contradictions."),
    ]
    try:
        if lf:
            with lf.start_as_current_observation(
                as_type="generation",
                name="memory-audit-contradictions",
                model=model_for_agent("memory-auditor"),
                prompt=prompt_obj,
                input=messages,
            ) as gen:
                response = await llm.ainvoke(messages)
                gen.update(output=response.content, usage=extract_usage(response))
        else:
            response = await llm.ainvoke(messages)
        text = response.content if hasattr(response, "content") else str(response)
        deletions = json.loads(text.strip())
        if not isinstance(deletions, list):
            return
    except Exception as exc:
        logger.warning(
            "memory_maintenance: _scan_associative_contradictions LLM/parse failed user=%s: %s",
            user_id, exc,
        )
        return
    deleted = 0
    for item in deletions:
        if not isinstance(item, dict):
            continue
        rid = item.get("delete")
        if not rid or rid not in id_to_text:
            continue
        result2 = await db.execute(
            select(MemoryAssociative).where(
                MemoryAssociative.id == rid,
                MemoryAssociative.user_id == user_id,
            )
        )
        target = result2.scalar_one_or_none()
        if target:
            await db.delete(target)
            deleted += 1
            logger.info(
                "memory_maintenance: audit deleted contradiction id=%s user=%s reason=%s",
                rid, user_id, item.get("reason", ""),
            )
    if deleted:
        try:
            await db.commit()
        except Exception as exc:
            logger.warning(
                "memory_maintenance: audit contradiction commit failed user=%s: %s", user_id, exc
            )
            await db.rollback()
    logger.info(
        "memory_maintenance: _scan_associative_contradictions user=%s deleted=%d", user_id, deleted
    )
 async def _canonicalize_relation_labels(db: AsyncSession, user_id: str) -> None:
    """Group near-duplicate entity labels in memory_relations and unify to canonical form."""
    result = await db.execute(
        select(MemoryRelation).where(MemoryRelation.user_id == user_id)
    )
    rows = result.scalars().all()
    if not rows:
        return
    all_labels: set[str] = set()
    for row in rows:
        all_labels.add(row.subject_label)
        all_labels.add(row.object_label)
    labels_list = sorted(all_labels)[:_AUDIT_MAX_LABELS]
    if len(labels_list) < 2:
        return
    labels_block = "\n".join(f"- {lbl}" for lbl in labels_list)
    template, prompt_obj = get_prompt_or_fallback(
        "memory_audit_canonicalize", _AUDIT_CANONICALIZE_FALLBACK
    )
    system_text = compile_prompt(template, prompt_obj, labels=labels_block)
    from app.core.llm import get_agent_llm, model_for_agent  # noqa: PLC0415
    from langchain_core.messages import HumanMessage, SystemMessage  # noqa: PLC0415
    llm = get_agent_llm("memory-auditor", temperature=0)
    lf = get_langfuse()
    messages = [
        SystemMessage(content=system_text),
        HumanMessage(content="Canonicalize entity labels."),
    ]
    try:
        if lf:
            with lf.start_as_current_observation(
                as_type="generation",
                name="memory-audit-canonicalize",
                model=model_for_agent("memory-auditor"),
                prompt=prompt_obj,
                input=messages,
            ) as gen:
                response = await llm.ainvoke(messages)
                gen.update(output=response.content, usage=extract_usage(response))
        else:
            response = await llm.ainvoke(messages)
        text = response.content if hasattr(response, "content") else str(response)
        groups = json.loads(text.strip())
        if not isinstance(groups, list):
            return
    except Exception as exc:
        logger.warning(
            "memory_maintenance: _canonicalize_relation_labels LLM/parse failed user=%s: %s",
            user_id, exc,
        )
        return
    # Build variant → canonical map
    remap: dict[str, str] = {}
    for group in groups:
        if not isinstance(group, dict):
            continue
        canonical = group.get("canonical", "")
        variants = group.get("variants") or []
        if not canonical:
            continue
        for v in variants:
            if isinstance(v, str) and v != canonical:
                remap[v] = canonical
    if not remap:
        return
    updated = 0
    for row in rows:
        changed = False
        if row.subject_label in remap:
            row.subject_label = remap[row.subject_label]
            changed = True
        if row.object_label in remap:
            row.object_label = remap[row.object_label]
            changed = True
        if changed:
            updated += 1
    if updated:
        try:
            await db.commit()
            logger.info(
                "memory_maintenance: _canonicalize_relation_labels user=%s updated=%d",
                user_id, updated,
            )
        except Exception as exc:
            logger.warning(
                "memory_maintenance: canonicalize commit failed user=%s: %s", user_id, exc
            )
            await db.rollback()
--- a/app/core/memory_middleware.py
+++ b/app/core/memory_middleware.py
@@ -0,0 +1,733 @@
 """Memory Middleware — enrich requests with memory context and store interactions.
 Four-tier memory model (MemGPT-style):
  core         — persistent key/value user preferences, always injected
  associative  — semantic similarity search via pgvector (top-k)
  episodic     — recent session summaries (last N)
  proactive    — behavioral patterns above confidence threshold
 All memory content is encrypted at rest using the per-user Fernet key
 stored in User.encryption_key. Decryption happens in-memory only.
 Usage:
    memory = MemoryMiddleware(db_session)
    context = await memory.enrich_context(user_id, message)
    # ... run agent ...
    await memory.store_episode(user_id, session_id, message, response)
 """
 from __future__ import annotations
 import asyncio
 import logging
 import uuid
 from datetime import datetime, timezone
 from typing import Any
 from cryptography.fernet import Fernet, InvalidToken
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.models import (
    ExtractionQueue,
    MemoryAssociative,
    MemoryCore,
    MemoryEpisodic,
    MemoryProactive,
    MemoryRelation,
    User,
 )
 logger = logging.getLogger(__name__)
 def _now() -> datetime:
    return datetime.now(timezone.utc)
 # Tuning constants
 _ASSOCIATIVE_TOP_K = 5
 _EPISODIC_RECENT_N = 10
 _PROACTIVE_CONFIDENCE_THRESHOLD = 0.6
 class MemoryMiddleware:
    """Enrich orchestrator context with memory and persist interactions after."""
    def __init__(self, db: AsyncSession) -> None:
        self._db = db
    # ── Public API ────────────────────────────────────────────────────────────
    async def enrich_context(
        self,
        user_id: str,
        message: str,
        trace_id: str | None = None,
        session_id: str | None = None,
    ) -> dict[str, Any]:
        """Build memory context dict to inject into the orchestrator before LLM call.
        Returns a dict with keys:
          core_memory        — {key: plaintext_value, ...}
          associative_memory — [plaintext_content, ...]  (top-k by keyword match)
          episodic_memory    — [plaintext_summary, ...]  (most recent N)
          proactive_hints    — [plaintext_pattern, ...]  (above threshold)
          relational_memory  — ["subject --predicate--> object", ...] (top 10, Pro+)
        """
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return {}
        user_dbg = await self._get_user_debug(user_id)
        user_tier: str = user_dbg.get("tier") or "free"
        core = await self._load_core(user_id, fernet)
        associative = await self._load_associative(user_id, message, fernet, user_tier=user_tier)
        episodic = await self._load_episodic(user_id, fernet, session_id=session_id)
        proactive = await self._load_proactive(user_id, fernet)
        relational = await self._load_relational(user_id, user_tier=user_tier)
        logger.info(
            "memory: enrich_context trace=%s user=%s tier=%s core=%d associative=%d episodic=%d proactive=%d relational=%d",
            trace_id or "-",
            user_id,
            user_tier,
            len(core),
            len(associative),
            len(episodic),
            len(proactive),
            len(relational),
        )
        return {
            "core_memory": core,
            "associative_memory": associative,
            "episodic_memory": episodic,
            "proactive_hints": proactive,
            "relational_memory": relational,
        }
    async def store_episode(
        self,
        user_id: str,
        session_id: str,
        message: str,
        response: str,
        trace_id: str | None = None,
    ) -> None:
        """Summarise and store a completed interaction in episodic memory.
        The summary is a simple heuristic concatenation (no LLM call) to keep
        latency low. After committing the episode row, dispatches the Mem0-style
        extraction pipeline:
          - Pro/Power/Team → asyncio.create_task (fire-and-forget, realtime).
          - Free → enqueue an ExtractionQueue row for the daily cron.
        """
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return
        summary = f"User: {message[:200]}\nAssistant: {response[:200]}"
        encrypted = _encrypt(fernet, summary)
        episode = MemoryEpisodic(
            id=str(uuid.uuid4()),
            user_id=user_id,
            summary_encrypted=encrypted,
            session_id=session_id,
        )
        self._db.add(episode)
        episode_id: str = episode.id
        try:
            await self._db.commit()
            user_dbg = await self._get_user_debug(user_id)
            tier = user_dbg.get("tier") or "free"
            logger.info(
                "memory: store_episode trace=%s user=%s tier=%s session=%s",
                trace_id or "-",
                user_id,
                tier,
                session_id,
            )
        except Exception as exc:
            logger.error("memory: store_episode failed user=%s: %s", user_id, exc)
            await self._db.rollback()
            return
        # ── Dispatch extraction pipeline (Phase 2) ────────────────────────────
        await self._dispatch_extraction(
            user_id=user_id,
            episode_id=episode_id,
            last_user_msg=message,
            last_assistant_msg=response,
            session_id=session_id,
        )
    async def _dispatch_extraction(
        self,
        user_id: str,
        episode_id: str,
        last_user_msg: str,
        last_assistant_msg: str,
        session_id: str | None,
    ) -> None:
        """Route extraction to realtime task or batch queue based on user tier."""
        from app.billing.tier_manager import tier_manager  # noqa: PLC0415
        tier = await tier_manager.get_tier(user_id, self._db)
        if tier_manager.check_feature(tier, "realtime_extraction"):
            # Pro/Power/Team: fire-and-forget in the background.
            # Must open a fresh session — request session closes after handler returns.
            from app.core.memory_extraction import run_extraction  # noqa: PLC0415
            from app.db import async_session  # noqa: PLC0415
            async def _task() -> None:
                try:
                    async with async_session() as fresh_db:
                        await run_extraction(
                            db=fresh_db,
                            user_id=user_id,
                            last_user_msg=last_user_msg,
                            last_assistant_msg=last_assistant_msg,
                            session_id=session_id,
                        )
                except Exception as exc:
                    logger.warning(
                        "memory: extraction task failed user=%s: %s", user_id, exc
                    )
            asyncio.create_task(_task())
            logger.info("memory: realtime extraction dispatched user=%s", user_id)
        else:
            # Free tier: enqueue for daily batch cron.
            queue_row = ExtractionQueue(
                id=str(uuid.uuid4()),
                user_id=user_id,
                episode_id=episode_id,
            )
            self._db.add(queue_row)
            try:
                await self._db.commit()
                logger.info(
                    "memory: extraction enqueued (batch) user=%s episode=%s",
                    user_id,
                    episode_id,
                )
            except Exception as exc:
                logger.warning(
                    "memory: extraction queue insert failed user=%s: %s", user_id, exc
                )
                await self._db.rollback()
    async def update_core(self, user_id: str, key: str, value: str, trace_id: str | None = None) -> None:
        """Upsert a core memory key/value for a user."""
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return
        encrypted = _encrypt(fernet, value)
        result = await self._db.execute(
            select(MemoryCore).where(
                MemoryCore.user_id == user_id,
                MemoryCore.key == key,
            )
        )
        existing = result.scalar_one_or_none()
        if existing is not None:
            existing.value_encrypted = encrypted
        else:
            self._db.add(MemoryCore(
                id=str(uuid.uuid4()),
                user_id=user_id,
                key=key,
                value_encrypted=encrypted,
            ))
        try:
            await self._db.commit()
            user_dbg = await self._get_user_debug(user_id)
            logger.info(
                "memory: update_core trace=%s user=%s tier=%s key=%s",
                trace_id or "-",
                user_id,
                user_dbg.get("tier") or "-",
                key,
            )
        except Exception as exc:
            logger.error("memory: update_core failed user=%s key=%s: %s", user_id, key, exc)
            await self._db.rollback()
    async def list_core_blocks(self, user_id: str) -> list[dict[str, str]]:
        """Return core memory as editable blocks (label/value)."""
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return []
        result = await self._db.execute(
            select(MemoryCore)
            .where(MemoryCore.user_id == user_id)
            .order_by(MemoryCore.key.asc())
        )
        rows = result.scalars().all()
        out: list[dict[str, str]] = []
        for row in rows:
            plaintext = _safe_decrypt(fernet, row.value_encrypted)
            if plaintext is not None:
                out.append({"label": row.key, "value": plaintext})
        logger.debug("memory: list_core_blocks user=%s count=%d", user_id, len(out))
        return out
    async def get_core_block(self, user_id: str, label: str) -> str | None:
        """Return a single core memory block value by label."""
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return None
        result = await self._db.execute(
            select(MemoryCore).where(
                MemoryCore.user_id == user_id,
                MemoryCore.key == label,
            )
        )
        row = result.scalar_one_or_none()
        if row is None:
            logger.debug("memory: get_core_block user=%s label=%s found=0", user_id, label)
            return None
        value = _safe_decrypt(fernet, row.value_encrypted)
        logger.debug("memory: get_core_block user=%s label=%s found=%d", user_id, label, 1 if value is not None else 0)
        return value
    async def delete_core(self, user_id: str, label: str) -> bool:
        """Delete a core memory block by label. Returns True if deleted."""
        result = await self._db.execute(
            select(MemoryCore).where(
                MemoryCore.user_id == user_id,
                MemoryCore.key == label,
            )
        )
        row = result.scalar_one_or_none()
        if row is None:
            logger.debug("memory: delete_core user=%s label=%s found=0", user_id, label)
            return False
        await self._db.delete(row)
        try:
            await self._db.commit()
            logger.info("memory: delete_core user=%s label=%s", user_id, label)
            return True
        except Exception as exc:
            logger.error("memory: delete_core failed user=%s label=%s: %s", user_id, label, exc)
            await self._db.rollback()
            return False
    async def append_core(self, user_id: str, label: str, content: str) -> None:
        """Append content to a core block, creating it if missing."""
        current = await self.get_core_block(user_id, label)
        if current is None:
            await self.update_core(user_id, label, content)
            logger.info("memory: append_core user=%s label=%s created=1", user_id, label)
            return
        await self.update_core(user_id, label, f"{current}\n{content}")
        logger.info("memory: append_core user=%s label=%s created=0", user_id, label)
    async def replace_core(self, user_id: str, label: str, old: str, new: str) -> bool:
        """Replace one exact string inside a core block. Returns False if not found."""
        current = await self.get_core_block(user_id, label)
        if current is None or old not in current:
            logger.debug("memory: replace_core user=%s label=%s changed=0", user_id, label)
            return False
        await self.update_core(user_id, label, current.replace(old, new, 1))
        logger.info("memory: replace_core user=%s label=%s changed=1", user_id, label)
        return True
    async def store_associative(
        self,
        user_id: str,
        content: str,
        entity_type: str | None = None,
        entity_id: str | None = None,
    ) -> None:
        """Store associative memory; embed if user tier has real_embeddings."""
        from app.billing.tier_manager import tier_manager  # noqa: PLC0415
        from app.core.embeddings import embed_text  # noqa: PLC0415
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return
        encrypted = _encrypt(fernet, content)
        user_dbg = await self._get_user_debug(user_id)
        user_tier = user_dbg.get("tier") or "free"
        embedding: list[float] | None = None
        if tier_manager.check_feature(user_tier, "real_embeddings"):
            embedding = await embed_text(content)
        row = MemoryAssociative(
            id=str(uuid.uuid4()),
            user_id=user_id,
            content_encrypted=encrypted,
            embedding=embedding,
            entity_type=entity_type,
            entity_id=entity_id,
        )
        self._db.add(row)
        try:
            await self._db.commit()
            logger.info(
                "memory: store_associative user=%s embedded=%s",
                user_id,
                embedding is not None,
            )
        except Exception as exc:
            logger.error("memory: store_associative failed user=%s: %s", user_id, exc)
            await self._db.rollback()
    async def upsert_relation(
        self,
        user_id: str,
        subject: str,
        subject_type: str,
        predicate: str,
        object_: str,
        object_type: str,
        *,
        confidence: float = 0.7,
        source_episode_id: str | None = None,
        notes: str | None = None,
    ) -> None:
        """Insert or update a relation row.  Matches on (user_id, subject_label, predicate, object_label).
        subject_label / object_label are plaintext entity identifiers — not encrypted.
        notes is optional; encrypted with user Fernet if provided.
        """
        from app.billing.tier_manager import tier_manager  # noqa: PLC0415
        user_dbg = await self._get_user_debug(user_id)
        user_tier = user_dbg.get("tier") or "free"
        if not tier_manager.check_feature(user_tier, "relational_memory"):
            logger.debug("memory: upsert_relation skipped (tier=%s no relational_memory)", user_tier)
            return
        notes_encrypted: bytes | None = None
        if notes:
            fernet = await self._get_fernet(user_id)
            if fernet:
                notes_encrypted = fernet.encrypt(notes.encode())
        result = await self._db.execute(
            select(MemoryRelation).where(
                MemoryRelation.user_id == user_id,
                MemoryRelation.subject_label == subject,
                MemoryRelation.predicate == predicate,
                MemoryRelation.object_label == object_,
            )
        )
        existing = result.scalar_one_or_none()
        if existing is not None:
            existing.subject_type = subject_type
            existing.object_type = object_type
            existing.confidence = confidence
            existing.last_confirmed_at = _now()
            if notes_encrypted is not None:
                existing.notes_encrypted = notes_encrypted
        else:
            self._db.add(MemoryRelation(
                id=str(uuid.uuid4()),
                user_id=user_id,
                subject_label=subject,
                subject_type=subject_type,
                predicate=predicate,
                object_label=object_,
                object_type=object_type,
                confidence=confidence,
                source_episode_id=source_episode_id,
                notes_encrypted=notes_encrypted,
            ))
        try:
            await self._db.commit()
            logger.info(
                "memory: upsert_relation user=%s subject=%s predicate=%s object=%s",
                user_id, subject, predicate, object_,
            )
        except Exception as exc:
            logger.error("memory: upsert_relation failed user=%s: %s", user_id, exc)
            await self._db.rollback()
    async def query_relations(
        self,
        user_id: str,
        subject: str | None = None,
        predicate: str | None = None,
        object_: str | None = None,
        limit: int = 20,
    ) -> list[MemoryRelation]:
        """Query relation rows for a user with optional filters."""
        q = select(MemoryRelation).where(MemoryRelation.user_id == user_id)
        if subject is not None:
            q = q.where(MemoryRelation.subject_label == subject)
        if predicate is not None:
            q = q.where(MemoryRelation.predicate == predicate)
        if object_ is not None:
            q = q.where(MemoryRelation.object_label == object_)
        q = q.order_by(MemoryRelation.confidence.desc()).limit(limit)
        result = await self._db.execute(q)
        return list(result.scalars().all())
    async def insert_archival(self, user_id: str, content: str, source: str = "manual") -> None:
        """Insert a long-term archival memory entry."""
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return
        encrypted = _encrypt(fernet, content)
        row = MemoryAssociative(
            id=str(uuid.uuid4()),
            user_id=user_id,
            content_encrypted=encrypted,
            embedding=None,
            entity_type=source,
            entity_id=None,
        )
        self._db.add(row)
        try:
            await self._db.commit()
            logger.info("memory: insert_archival user=%s source=%s", user_id, source)
        except Exception as exc:
            logger.error("memory: insert_archival failed user=%s: %s", user_id, exc)
            await self._db.rollback()
    async def search_archival(self, user_id: str, query: str, top_k: int = 5) -> list[str]:
        """Search archival memory (keyword fallback; semantic ranking can replace this)."""
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return []
        result = await self._db.execute(
            select(MemoryAssociative)
            .where(MemoryAssociative.user_id == user_id)
            .order_by(MemoryAssociative.updated_at.desc())
            .limit(100)
        )
        rows = result.scalars().all()
        needle = query.strip().lower()
        out: list[str] = []
        for row in rows:
            plaintext = _safe_decrypt(fernet, row.content_encrypted)
            if plaintext is None:
                continue
            if not needle or needle in plaintext.lower():
                out.append(plaintext)
            if len(out) >= max(top_k, 1):
                break
        logger.info("memory: search_archival user=%s query=%s hits=%d", user_id, query[:80], len(out))
        return out
    async def search_recall(self, user_id: str, query: str, top_k: int = 5) -> list[str]:
        """Search recall memory (episodic summaries) by keyword."""
        fernet = await self._get_fernet(user_id)
        if fernet is None:
            return []
        result = await self._db.execute(
            select(MemoryEpisodic)
            .where(MemoryEpisodic.user_id == user_id)
            .order_by(MemoryEpisodic.created_at.desc())
            .limit(100)
        )
        rows = result.scalars().all()
        needle = query.strip().lower()
        out: list[str] = []
        for row in rows:
            plaintext = _safe_decrypt(fernet, row.summary_encrypted)
            if plaintext is None:
                continue
            if not needle or needle in plaintext.lower():
                out.append(plaintext)
            if len(out) >= max(top_k, 1):
                break
        logger.info("memory: search_recall user=%s query=%s hits=%d", user_id, query[:80], len(out))
        return out
    # ── Private helpers ───────────────────────────────────────────────────────
    async def _get_fernet(self, user_id: str) -> Fernet | None:
        """Load the user's Fernet key from DB. Returns None if missing."""
        result = await self._db.execute(select(User).where(User.id == user_id))
        user = result.scalar_one_or_none()
        if user is None or not user.encryption_key:
            logger.warning("memory: no encryption_key for user=%s", user_id)
            return None
        return Fernet(user.encryption_key.encode())
    async def _get_user_debug(self, user_id: str) -> dict[str, str | None]:
        """Load lightweight user debug fields for trace logs."""
        from app.config.settings import settings  # noqa: PLC0415
        from app.models import Subscription  # noqa: PLC0415
        result = await self._db.execute(select(User).where(User.id == user_id))
        user = result.scalar_one_or_none()
        if user is None:
            return {"tier": None}
        sub_result = await self._db.execute(
            select(Subscription.tier).where(Subscription.user_id == user_id)
        )
        sub_tier: str | None = sub_result.scalar_one_or_none()
        if sub_tier:
            tier = sub_tier
        elif settings.ENV == "dev":
            tier = "power"
        else:
            tier = user.tier or "free"
        return {"tier": tier}
    async def _load_core(self, user_id: str, fernet: Fernet) -> dict[str, str]:
        result = await self._db.execute(
            select(MemoryCore).where(MemoryCore.user_id == user_id)
        )
        rows = result.scalars().all()
        out: dict[str, str] = {}
        for row in rows:
            plaintext = _safe_decrypt(fernet, row.value_encrypted)
            if plaintext is not None:
                out[row.key] = plaintext
        return out
    async def _load_associative(
        self, user_id: str, message: str, fernet: Fernet, *, user_tier: str = "free"
    ) -> list[str]:
        """Load top-k associative memories.
        Pro+: pgvector cosine similarity on the message embedding (real_embeddings feature).
        Free / embedding failure: keyword-ordered fallback (most recent rows).
        """
        from app.billing.tier_manager import tier_manager  # noqa: PLC0415
        from app.core.embeddings import embed_text  # noqa: PLC0415
        if tier_manager.check_feature(user_tier, "real_embeddings"):
            vec = await embed_text(message)
            if vec is not None:
                try:
                    result = await self._db.execute(
                        select(MemoryAssociative)
                        .where(
                            MemoryAssociative.user_id == user_id,
                            MemoryAssociative.embedding.isnot(None),
                        )
                        .order_by(MemoryAssociative.embedding.cosine_distance(vec))
                        .limit(_ASSOCIATIVE_TOP_K)
                    )
                    rows = result.scalars().all()
                    out: list[str] = []
                    for row in rows:
                        plaintext = _safe_decrypt(fernet, row.content_encrypted)
                        if plaintext is not None:
                            out.append(plaintext)
                    logger.info(
                        "memory: _load_associative user=%s mode=vector hits=%d",
                        user_id,
                        len(out),
                    )
                    return out
                except Exception as exc:
                    logger.warning(
                        "memory: vector search failed user=%s, falling back to keyword: %s",
                        user_id,
                        exc,
                    )
        # Keyword fallback: most recent rows
        result = await self._db.execute(
            select(MemoryAssociative)
            .where(MemoryAssociative.user_id == user_id)
            .order_by(MemoryAssociative.updated_at.desc())
            .limit(_ASSOCIATIVE_TOP_K)
        )
        rows = result.scalars().all()
        out = []
        for row in rows:
            plaintext = _safe_decrypt(fernet, row.content_encrypted)
            if plaintext is not None:
                out.append(plaintext)
        return out
    async def _load_episodic(
        self,
        user_id: str,
        fernet: Fernet,
        session_id: str | None = None,
    ) -> list[str]:
        query = select(MemoryEpisodic).where(MemoryEpisodic.user_id == user_id)
        if session_id:
            query = query.where(MemoryEpisodic.session_id == session_id)
        result = await self._db.execute(
            query
            .order_by(MemoryEpisodic.created_at.desc())
            .limit(_EPISODIC_RECENT_N)
        )
        rows = result.scalars().all()
        out: list[str] = []
        for row in rows:
            plaintext = _safe_decrypt(fernet, row.summary_encrypted)
            if plaintext is not None:
                out.append(plaintext)
        return out
    async def _load_relational(self, user_id: str, *, user_tier: str = "free") -> list[str]:
        """Return top-10 relation strings for Pro+ users; empty list for Free."""
        from app.billing.tier_manager import tier_manager  # noqa: PLC0415
        if not tier_manager.check_feature(user_tier, "relational_memory"):
            return []
        result = await self._db.execute(
            select(MemoryRelation)
            .where(MemoryRelation.user_id == user_id)
            .order_by(MemoryRelation.confidence.desc())
            .limit(10)
        )
        rows = result.scalars().all()
        out = [
            f"{r.subject_label} --{r.predicate}--> {r.object_label}"
            for r in rows
        ]
        return out
    async def _load_proactive(self, user_id: str, fernet: Fernet) -> list[str]:
        result = await self._db.execute(
            select(MemoryProactive)
            .where(
                MemoryProactive.user_id == user_id,
                MemoryProactive.confidence >= _PROACTIVE_CONFIDENCE_THRESHOLD,
            )
            .order_by(MemoryProactive.confidence.desc())
        )
        rows = result.scalars().all()
        out: list[str] = []
        for row in rows:
            plaintext = _safe_decrypt(fernet, row.pattern_encrypted)
            if plaintext is not None:
                out.append(plaintext)
        return out
 # ── Encryption helpers ────────────────────────────────────────────────────────
 def _encrypt(fernet: Fernet, plaintext: str) -> str:
    return fernet.encrypt(plaintext.encode()).decode()
 def _safe_decrypt(fernet: Fernet, ciphertext: str) -> str | None:
    """Decrypt and return plaintext, or None on error (corrupted/wrong key)."""
    try:
        return fernet.decrypt(ciphertext.encode()).decode()
    except (InvalidToken, Exception) as exc:
        logger.warning("memory: decrypt failed: %s", exc)
        return None
--- a/app/core/note_summarizer.py
+++ b/app/core/note_summarizer.py
@@ -0,0 +1,51 @@
 """Note summarizer — generates a compact AI summary for a note.
 Called fire-and-forget from create_note / update_note tools so the
 ``notes.ai_summary`` column stays current without blocking the agent loop.
 """
 from __future__ import annotations
 import logging
 from langchain_core.messages import HumanMessage, SystemMessage
 from app.core.langfuse_client import get_prompt_or_fallback
 from app.core.llm import get_agent_llm
 logger = logging.getLogger(__name__)
 _FALLBACK_PROMPT = """\
 Summarize this note in <=250 characters. Be terse and dense.
 Keep proper nouns, dates, decisions, and action items.
 Do not start with "This note".
 Respond with the summary text only — no intro, no labels.
 Title: {title}
 Content: {content}"""
 _MAX_CONTENT_CHARS = 4000
 async def generate_note_summary(title: str, content: str) -> str:
    """Return a <=250-char summary of *title* + *content*.
    Uses the Langfuse ``note_summary`` prompt (hot-swappable) with a local
    fallback.  Truncates *content* to 4000 chars before sending to avoid
    token waste on large notes.
    """
    template, _ = get_prompt_or_fallback("note_summary", _FALLBACK_PROMPT)
    trimmed = content[:_MAX_CONTENT_CHARS]
    system_prompt = template.format(title=title, content=trimmed)
    try:
        llm = get_agent_llm("note-summarizer")
        response = await llm.ainvoke([
            SystemMessage(content=system_prompt),
            HumanMessage(content="Generate the summary."),
        ])
        text = response.content if isinstance(response.content, str) else ""
        return text.strip()[:250]
    except Exception as exc:
        logger.warning("note_summarizer: failed to generate summary: %s", exc)
        return ""
--- a/app/core/output_formatter.py
+++ b/app/core/output_formatter.py
@@ -0,0 +1,63 @@
 """Output formatter for deep-agent stream events."""
 from __future__ import annotations
 import re
 from collections.abc import AsyncGenerator
 from typing import Any
 from app.schemas import WsStreamEnd, WsStreamStart, WsStreamText
 # Matches <canvas kind="...">...</canvas> blocks (single-line or multiline).
 _CANVAS_BLOCK_RE = re.compile(
    r'<canvas\s+kind=["\']([^"\']+)["\']>(.*?)</canvas>',
    re.DOTALL | re.IGNORECASE,
 )
 def extract_canvas_block(text: str) -> tuple[str, str | None, str | None]:
    """Strip the first <canvas kind="...">...</canvas> block from *text*.
    Returns ``(visible_text, canvas_content, canvas_kind)``.
    ``canvas_content`` and ``canvas_kind`` are ``None`` when no block is found.
    """
    match = _CANVAS_BLOCK_RE.search(text)
    if not match:
        return text, None, None
    canvas_kind = match.group(1).strip()
    canvas_content = match.group(2).strip()
    visible = text[: match.start()] + text[match.end() :]
    visible = visible.strip()
    return visible, canvas_content, canvas_kind
 WsFrame = WsStreamStart | WsStreamText | WsStreamEnd
 class StreamFormatter:
    """Convert `(event_type, data)` stream events into websocket frame models."""
    def __init__(self, request_id: str) -> None:
        self.request_id = request_id
    async def format(
        self,
        event_stream: AsyncGenerator[tuple[str, Any], None],
    ) -> AsyncGenerator[WsFrame, None]:
        started = False
        async for event_type, data in event_stream:
            if event_type != "token":
                continue
            if not started:
                yield WsStreamStart(request_id=self.request_id)
                started = True
            text = str(data or "")
            if text:
                yield WsStreamText(request_id=self.request_id, chunk=text)
        if not started:
            yield WsStreamStart(request_id=self.request_id)
        yield WsStreamEnd(request_id=self.request_id)
--- a/app/core/preprocessors/init.py
+++ b/app/core/preprocessors/init.py
@@ -0,0 +1,104 @@
 """Preprocessor registry: detect content type and dispatch to handlers.
 Public API
 ----------
 detect_content_type(filename, raw_content) -> str
    Heuristic detection based on file extension and content patterns.
 preprocess(content_type, raw_content) -> PreprocessResult
    Dispatch to the appropriate handler.
 """
 from __future__ import annotations
 import re
 from app.core.preprocessors.base import PreprocessResult
 # ── Heuristics ────────────────────────────────────────────────────────
 # Patterns that strongly suggest an email HTML file
 _EMAIL_SIGNALS = re.compile(
    r"(Subject:|From:|To:|Date:|Sent:|MIME-Version:|Content-Type:\s*text/html)",
    re.IGNORECASE,
 )
 # Patterns that suggest a generic HTML page (not an email)
 _GENERIC_HTML_SIGNALS = re.compile(
    r"<(nav|main|header|footer|article|section)\b",
    re.IGNORECASE,
 )
 def detect_content_type(filename: str, raw_content: str) -> str:
    """Return a content-type string for the given file.
    Supported types: ``"email_html"``, ``"generic_html"``,
    ``"plain_text"``, ``"unknown"``.
    """
    ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
    if ext == "txt":
        return "plain_text"
    if ext in ("html", "htm", "eml", "mhtml", "mht"):
        # Prefer email detection over generic HTML
        if _EMAIL_SIGNALS.search(raw_content[:4096]):
            return "email_html"
        if _GENERIC_HTML_SIGNALS.search(raw_content[:4096]) or "<html" in raw_content[:200].lower():
            return "generic_html"
        # .html without clear signals — check for any email header
        if re.search(r"^(From|To|Subject|Date):", raw_content[:2048], re.MULTILINE | re.IGNORECASE):
            return "email_html"
        return "generic_html"
    # Plain text files with email headers
    if ext in ("", "txt") or not ext:
        if _EMAIL_SIGNALS.search(raw_content[:4096]):
            return "email_html"
    # Detect binary content
    try:
        raw_content.encode("utf-8")
    except (UnicodeEncodeError, AttributeError):
        return "unknown"
    # Non-text bytes heuristic: high ratio of non-printable chars
    sample = raw_content[:512]
    non_printable = sum(1 for c in sample if ord(c) < 32 and c not in "\r\n\t")
    if len(sample) > 0 and non_printable / len(sample) > 0.1:
        return "unknown"
    return "unknown"
 # ── Generic fallback handler ──────────────────────────────────────────
 def _preprocess_generic(raw_content: str, content_type: str) -> PreprocessResult:
    """Strip HTML tags if present, return text as-is."""
    try:
        from bs4 import BeautifulSoup
        text = BeautifulSoup(raw_content, "html.parser").get_text(separator="\n")
    except ImportError:
        # No BeautifulSoup — strip tags with a simple regex
        text = re.sub(r"<[^>]+>", "", raw_content)
    text = re.sub(r"\n{3,}", "\n\n", text).strip()
    return PreprocessResult(content_type=content_type, clean_text=text, metadata={})
 # ── Dispatch ──────────────────────────────────────────────────────────
 def preprocess(content_type: str, raw_content: str) -> PreprocessResult:
    """Dispatch *raw_content* to the handler registered for *content_type*.
    Falls back to the generic handler for unknown types.
    """
    if content_type == "email_html":
        from app.core.preprocessors.email_html import preprocess_email_html
        return preprocess_email_html(raw_content)
    return _preprocess_generic(raw_content, content_type)
 __all__ = ["detect_content_type", "preprocess", "PreprocessResult"]
--- a/app/core/preprocessors/base.py
+++ b/app/core/preprocessors/base.py
@@ -0,0 +1,25 @@
 """Base types for the preprocessor system."""
 from __future__ import annotations
 from dataclasses import dataclass, field
@dataclass
 class PreprocessResult:
    """Output of a preprocessor handler.
    Attributes
    ----------
    content_type:
        The detected content type (e.g. ``"email_html"``, ``"plain_text"``).
    clean_text:
        Human-readable text stripped of markup/binary noise.
    metadata:
        Dict of extracted metadata (keys vary by handler).
        Common keys: ``subject``, ``from``, ``to``, ``date``, ``filename``.
    """
    content_type: str
    clean_text: str
    metadata: dict = field(default_factory=dict)
--- a/app/core/preprocessors/email_html.py
+++ b/app/core/preprocessors/email_html.py
@@ -0,0 +1,111 @@
 """Preprocessor for email HTML files.
 Handles:
 - HTML stripping via BeautifulSoup
 - Metadata extraction (Subject, From, To, Date)
 - Thread splitting — isolates the latest reply
 """
 from __future__ import annotations
 import re
 from typing import TYPE_CHECKING
 from app.core.preprocessors.base import PreprocessResult
 if TYPE_CHECKING:
    pass
 # ── Thread split markers ──────────────────────────────────────────────
 # Matches patterns like:
 #   "On Mon, Apr 7, 2026 at 10:00 AM, Alice <alice@co.com> wrote:"
 #   "-----Original Message-----"
 #   "> " (plain-text quote prefix)
 _THREAD_PATTERNS = [
    re.compile(r"^On\s+.+wrote\s*:", re.IGNORECASE | re.MULTILINE),
    re.compile(r"^-{3,}\s*(original message|forwarded message)\s*-{3,}", re.IGNORECASE | re.MULTILINE),
    re.compile(r"^>{1,}\s+\S", re.MULTILINE),
    re.compile(r"^From:\s+.+\nSent:\s+", re.IGNORECASE | re.MULTILINE),
 ]
 # ── Metadata patterns (applied on raw HTML / plain fallback) ──────────
 _META_PATTERNS: dict[str, list[re.Pattern]] = {
    "subject": [
        re.compile(r"<title>(.+?)</title>", re.IGNORECASE | re.DOTALL),
        re.compile(r"Subject:\s*(.+)", re.IGNORECASE),
    ],
    "from": [
        re.compile(r'<meta[^>]+name=["\']?from["\']?[^>]+content=["\']([^"\']+)["\']', re.IGNORECASE),
        re.compile(r"From:\s*(.+)", re.IGNORECASE),
    ],
    "to": [
        re.compile(r'<meta[^>]+name=["\']?to["\']?[^>]+content=["\']([^"\']+)["\']', re.IGNORECASE),
        re.compile(r"To:\s*(.+)", re.IGNORECASE),
    ],
    "date": [
        re.compile(r'<meta[^>]+name=["\']?date["\']?[^>]+content=["\']([^"\']+)["\']', re.IGNORECASE),
        re.compile(r"Date:\s*(.+)", re.IGNORECASE),
        re.compile(r"Sent:\s*(.+)", re.IGNORECASE),
    ],
 }
 def _extract_metadata(raw_html: str, text: str) -> dict:
    """Extract Subject/From/To/Date from raw HTML or plain text."""
    metadata: dict[str, str] = {}
    for field, patterns in _META_PATTERNS.items():
        for pat in patterns:
            m = pat.search(raw_html) or pat.search(text)
            if m:
                metadata[field] = m.group(1).strip()
                break
    return metadata
 def _split_thread(text: str) -> str:
    """Return only the latest message in a threaded email."""
    earliest_pos: int | None = None
    for pat in _THREAD_PATTERNS:
        m = pat.search(text)
        if m and (earliest_pos is None or m.start() < earliest_pos):
            earliest_pos = m.start()
    if earliest_pos is not None and earliest_pos > 0:
        return text[:earliest_pos].strip()
    return text.strip()
 def preprocess_email_html(raw_content: str) -> PreprocessResult:
    """Strip HTML, extract metadata, split thread from an email HTML file."""
    try:
        from bs4 import BeautifulSoup  # lazy import — optional dep
    except ImportError as exc:
        raise ImportError(
            "beautifulsoup4 is required for email_html preprocessing. "
            "Install it with: pip install beautifulsoup4"
        ) from exc
    # Parse with lxml if available, fall back to html.parser
    try:
        soup = BeautifulSoup(raw_content, "lxml")
    except Exception:
        soup = BeautifulSoup(raw_content, "html.parser")
    # Remove noise tags
    for tag in soup(["style", "script", "head", "noscript"]):
        tag.decompose()
    clean_text = soup.get_text(separator="\n")
    # Collapse excessive blank lines
    clean_text = re.sub(r"\n{3,}", "\n\n", clean_text).strip()
    metadata = _extract_metadata(raw_content, clean_text)
    latest_message = _split_thread(clean_text)
    return PreprocessResult(
        content_type="email_html",
        clean_text=latest_message,
        metadata=metadata,
    )
--- a/app/core/scout_registry.py
+++ b/app/core/scout_registry.py
@@ -0,0 +1,30 @@
 """Minimal agent base types retained for compatibility with batch runners."""
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import Any
 class BaseAgent(ABC):
    """Common base for non-chat agents still using the old base contract."""
    def __init__(
        self,
        user_id: str = "",
        shared_memory: dict[str, Any] | None = None,
        vector_store_context: list[str] | None = None,
    ) -> None:
        self.user_id = user_id
        self.shared_memory: dict[str, Any] = shared_memory or {}
        self.vector_store_context: list[str] = vector_store_context or []
    @abstractmethod
    def get_name(self) -> str: ...
    @abstractmethod
    def get_description(self) -> str: ...
    @property
    def skills(self) -> list[str]:
        return []
--- a/app/core/scout_runner.py
+++ b/app/core/scout_runner.py
--- a/app/core/scout_session_buffer.py
+++ b/app/core/scout_session_buffer.py
@@ -0,0 +1,96 @@
 """In-process TTL buffer for per-session LangChain message history.
 Stores the full message list (including AIMessage with tool_calls and ToolMessage)
 keyed by (user_id, session_id), so agents can reconstruct tool-call context across
 conversation turns without it being lossy through the wire.
 Single-process only. For multi-worker deployments, replace the _SessionBuffer
 implementation with one backed by Redis (serialize LangChain messages to dicts via
 message_to_dict / messages_from_dict from langchain_core.messages).
 """
 from __future__ import annotations
 import time
 from threading import Lock
 from langchain_core.messages import BaseMessage
 SESSION_TTL_SECONDS = 1800  # 30-minute idle expiry
 MAX_MESSAGES_PER_SESSION = 80  # cap to avoid unbounded memory growth
 class _SessionBuffer:
    def __init__(self) -> None:
        self._store: dict[tuple[str, str], tuple[float, list[BaseMessage]]] = {}
        self._lock = Lock()
    def _evict_stale(self) -> None:
        now = time.monotonic()
        stale = [k for k, (ts, _) in self._store.items() if now - ts > SESSION_TTL_SECONDS]
        for k in stale:
            del self._store[k]
    def get(self, user_id: str, session_id: str) -> list[BaseMessage] | None:
        key = (user_id, session_id)
        with self._lock:
            entry = self._store.get(key)
            if entry is None:
                return None
            ts, msgs = entry
            if time.monotonic() - ts > SESSION_TTL_SECONDS:
                del self._store[key]
                return None
            self._store[key] = (time.monotonic(), msgs)
            return list(msgs)
    def set(self, user_id: str, session_id: str, messages: list[BaseMessage]) -> None:
        key = (user_id, session_id)
        capped = messages[-MAX_MESSAGES_PER_SESSION:]
        with self._lock:
            self._evict_stale()
            self._store[key] = (time.monotonic(), capped)
    def clear(self, user_id: str, session_id: str) -> None:
        with self._lock:
            self._store.pop((user_id, session_id), None)
    def append_system_message(self, user_id: str, session_id: str, text: str) -> None:
        """Append a synthetic system message to the buffer for the given session.
        Creates the session slot if it does not yet exist.  Used by the
        contextual_scope_update handler to inject navigation events without
        making an LLM call.
        """
        from langchain_core.messages import SystemMessage  # noqa: PLC0415
        key = (user_id, session_id)
        with self._lock:
            entry = self._store.get(key)
            if entry is None:
                msgs: list[BaseMessage] = [SystemMessage(content=text)]
            else:
                _, existing = entry
                msgs = list(existing) + [SystemMessage(content=text)]
            capped = msgs[-MAX_MESSAGES_PER_SESSION:]
            self._store[key] = (time.monotonic(), capped)
 class ContextualBufferProxy:
    """Thin wrapper around _SessionBuffer that closes over user_id + session_id.
    Returned by get_session_buffer() so callers can call
    ``proxy.append_system_message(text)`` without threading user_id/session_id
    through every call site.
    """
    def __init__(self, buf: "_SessionBuffer", user_id: str, session_id: str) -> None:
        self._buf = buf
        self._user_id = user_id
        self._session_id = session_id
    def append_system_message(self, text: str) -> None:
        self._buf.append_system_message(self._user_id, self._session_id, text)
 # Module-level singleton — same pattern as _pending_states in api/app/api/routes/auth.py
 session_buffer = _SessionBuffer()
--- a/app/core/ws_context.py
+++ b/app/core/ws_context.py
@@ -0,0 +1,115 @@
 """WebSocket client executor context.
 Holds a per-request async callback that tools call to execute CRUD
 operations on the Electron client's local SQLite / LanceDB databases.
 The callback sends a `tool_call` WS frame and awaits the `tool_result`.
 """
 from __future__ import annotations
 import re
 from contextvars import ContextVar
 from typing import Any, Callable, Coroutine
 from uuid import uuid4
 _SNAKE_TO_CAMEL_RE = re.compile(r"_([a-z])")
 def _key_to_camel(key: str) -> str:
    return _SNAKE_TO_CAMEL_RE.sub(lambda m: m.group(1).upper(), key)
 def _keys_to_camel(obj: Any) -> Any:
    """Recursively convert dict keys from snake_case to camelCase.
    Mirrors the JS-side ``toCamelCase`` applied to incoming WS frames in
    ``adiuvAI/src/main/api/backend-client.ts``. The Electron executor wraps
    tool_result payloads in ``toSnakeCase`` before sending; this restores the
    camelCase schema property names that the tool code expects to read.
    """
    if isinstance(obj, dict):
        return {_key_to_camel(k): _keys_to_camel(v) for k, v in obj.items()}
    if isinstance(obj, list):
        return [_keys_to_camel(v) for v in obj]
    return obj
 # Holds the execute callback for the current WS session.
 # Set by the chat WS handler before the orchestrator runs; cleared after.
 _client_executor: ContextVar[Callable[[dict], Coroutine[Any, Any, dict]]] = ContextVar(
    "_client_executor"
 )
 # Optional collector that captures raw execute_on_client results.
 # Set by _tool_loop / _tool_loop_stream to populate ChatAgent.tool_results.
 _tool_result_collector: ContextVar[list[dict] | None] = ContextVar(
    "_tool_result_collector", default=None
 )
 def set_tool_result_collector(lst: list[dict]) -> None:
    """Register *lst* as the collector for this async context."""
    _tool_result_collector.set(lst)
 def clear_tool_result_collector() -> None:
    """Clear the collector (best-effort)."""
    _tool_result_collector.set(None)
 def set_client_executor(fn: Callable[[dict], Coroutine[Any, Any, dict]]) -> None:
    """Bind *fn* as the executor for the current async context (task/coroutine)."""
    _client_executor.set(fn)
 def clear_client_executor() -> None:
    """Remove the executor binding (best-effort; ContextVar resets on task exit)."""
    try:
        _client_executor.set(None)  # type: ignore[arg-type]
    except Exception:
        pass
 async def execute_on_client(
    action: str,
    table: str | None = None,
    data: dict[str, Any] | None = None,
    filters: dict[str, Any] | None = None,
    vector: list[float] | None = None,
    limit: int | None = None,
 ) -> dict[str, Any]:
    """Send a CRUD/vector operation to the Electron client and return the result.
    Builds a ``tool_call`` payload, invokes the per-session WS callback,
    and returns the ``tool_result`` dict from Electron.
    Raises ``RuntimeError`` if no executor is set (i.e. called outside a WS session).
    """
    callback = _client_executor.get(None)
    if callback is None:
        raise RuntimeError(
            "execute_on_client() called outside a WebSocket session — "
            "no client executor is set."
        )
    payload: dict[str, Any] = {"id": str(uuid4()), "action": action}
    if table is not None:
        payload["table"] = table
    if data is not None:
        payload["data"] = data
    if filters is not None:
        payload["filters"] = {k: v for k, v in filters.items() if v is not None}
    if vector is not None:
        payload["vector"] = vector
    if limit is not None:
        payload["limit"] = limit
    result = await callback(payload)
    result = _keys_to_camel(result)
    collector = _tool_result_collector.get(None)
    if collector is not None:
        collector.append({
            "action": action,
            "table": table,
            "data": result,
        })
    return result
--- a/shared/db.py
+++ b/shared/db.py
@@ -1,7 +1,15 @@
-"""Database engine, session factory, and declarative base.
+"""Database engine, session factory, and base model.
-All services use the async SQLAlchemy API via ``get_session()``.
+All app code uses the async SQLAlchemy API.  Alembic migrations use the
-Alembic migrations use the synchronous psycopg2 URL (see alembic/env.py).
+synchronous psycopg2 URL for the CLI (see alembic/env.py).
 Usage in routes:
    from app.db import get_session
    from sqlalchemy.ext.asyncio import AsyncSession
    async def my_route(db: AsyncSession = Depends(get_session)):
        result = await db.execute(select(User).where(User.email == email))
        user = result.scalar_one_or_none()
 """
 from __future__ import annotations
@@ -11,7 +19,7 @@ from collections.abc import AsyncGenerator
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
 from sqlalchemy.orm import DeclarativeBase
-from shared.config import settings
+from app.config.settings import settings
 engine = create_async_engine(
    settings.DATABASE_URL,
--- a/services/batch-agent/app/integrations/init.py
+++ b/services/batch-agent/app/integrations/init.py
@@ -1,11 +1,20 @@
 """Cloud provider integration utilities.
 Adapted for Batch Agent Service: import from shared.config instead of app.config.
 Provides:
-  * Shared message dataclasses (EmailMessage, ChatMessage)
+  * Shared message dataclasses (``EmailMessage``, ``ChatMessage``) used by
-  * get_provider() — factory for Gmail/MS Graph clients
+    both the Gmail and MS Graph clients and consumed by ``agent_runner``.
-  * encrypt_token() / decrypt_token() — Fernet-based OAuth token encryption
+  * ``get_provider()`` — factory that returns the correct client given a
    provider name and decrypted OAuth credentials dict.
  * ``encrypt_token()`` / ``decrypt_token()`` — Fernet-based at-rest
    encryption for OAuth tokens stored in ``cloud_agent_configs``.
 Encryption rationale
 --------------------
 Unlike user content (which is E2E-encrypted client-side and **never**
 decrypted server-side), OAuth tokens *must* be decrypted server-side
 because the backend makes provider API calls on behalf of the user.
 The Fernet key lives solely in ``OAUTH_ENCRYPTION_KEY`` env var — it
 is never returned to clients.
 """
 from __future__ import annotations
@@ -18,7 +27,7 @@ from typing import TYPE_CHECKING
 from cryptography.fernet import Fernet, InvalidToken
-from shared.config import settings
+from app.config.settings import settings
 if TYPE_CHECKING:
    from app.integrations.gmail import GmailClient
@@ -26,9 +35,13 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 # ── Shared message types ──────────────────────────────────────────────────
@dataclass
 class EmailMessage:
    """A single email message fetched from Gmail or Outlook."""
    id: str
    subject: str
    sender: str
@@ -38,6 +51,7 @@ class EmailMessage:
    @property
    def as_text(self) -> str:
        """Return a human-readable text representation for LLM extraction."""
        date_str = self.date.strftime("%Y-%m-%d %H:%M")
        labels_str = f" [{', '.join(self.labels)}]" if self.labels else ""
        return (
@@ -50,6 +64,8 @@ class EmailMessage:
@dataclass
 class ChatMessage:
    """A single Teams chat or channel message fetched from MS Graph."""
    id: str
    content: str
    sender: str
@@ -58,6 +74,7 @@ class ChatMessage:
    @property
    def as_text(self) -> str:
        """Return a human-readable text representation for LLM extraction."""
        date_str = self.date.strftime("%Y-%m-%d %H:%M")
        channel_str = f" [channel: {self.channel}]" if self.channel else ""
        return (
@@ -67,7 +84,15 @@ class ChatMessage:
        )
 # ── Fernet helpers ────────────────────────────────────────────────────────
 def _get_fernet() -> Fernet:
    """Return a ``Fernet`` instance using ``settings.OAUTH_ENCRYPTION_KEY``.
    Raises ``RuntimeError`` if ``OAUTH_ENCRYPTION_KEY`` is not set — callers
    must ensure this is configured before persisting OAuth tokens.
    """
    key = settings.OAUTH_ENCRYPTION_KEY
    if not key:
        raise RuntimeError(
@@ -78,6 +103,15 @@ def _get_fernet() -> Fernet:
 def encrypt_token(token_info: dict) -> str:
    """Fernet-encrypt an OAuth credential dict and return a base64 string.
    Stores the full ``{access_token, refresh_token, token_uri, client_id,
    client_secret, scopes, expiry}`` dict (or equivalent MSAL shape).
    Raises:
        RuntimeError: OAUTH_ENCRYPTION_KEY is not configured.
        ValueError: ``token_info`` is not a non-empty dict.
    """
    if not isinstance(token_info, dict) or not token_info:
        raise ValueError("token_info must be a non-empty dict")
    plaintext = json.dumps(token_info).encode("utf-8")
@@ -85,6 +119,13 @@ def encrypt_token(token_info: dict) -> str:
 def decrypt_token(encrypted: str) -> dict:
    """Decrypt a Fernet-encrypted token string and return the credential dict.
    Raises:
        RuntimeError: OAUTH_ENCRYPTION_KEY is not configured.
        ValueError: The encrypted string is invalid or was encrypted with a
            different key.
    """
    try:
        plaintext = _get_fernet().decrypt(encrypted.encode("utf-8"))
        return json.loads(plaintext)
@@ -92,10 +133,25 @@ def decrypt_token(encrypted: str) -> dict:
        raise ValueError(f"Failed to decrypt OAuth token: {exc}") from exc
 # ── Provider factory ──────────────────────────────────────────────────────
 def get_provider(
    provider: str,
    credentials_info: dict,
 ) -> "GmailClient | MSGraphClient":
    """Return the correct provider client for *provider*.
    Parameters
    ----------
    provider:
        One of ``"gmail"``, ``"outlook"``, ``"teams"``.
    credentials_info:
        Decrypted OAuth credential dict (Google or Microsoft shape).
    Raises:
        ValueError: Unknown provider name.
    """
    if provider == "gmail":
        from app.integrations.gmail import GmailClient
        return GmailClient(credentials_info)
--- a/services/batch-agent/app/integrations/gmail.py
+++ b/services/batch-agent/app/integrations/gmail.py
@@ -1,7 +1,26 @@
 """Gmail API client for cloud agent integration.
-Adapted for Batch Agent Service: import from app.integrations instead of
+Wraps the Google Gmail REST API to fetch email messages matching a
-app.integrations (same relative path within the service).
+``filter_config`` dict.  Uses the official ``google-api-python-client``
 library (synchronous) wrapped in ``asyncio.to_thread()`` to avoid
 blocking the event loop.
 Token refresh is handled transparently: when the stored access token has
 expired, ``google.auth.transport.requests.Request`` will use the refresh
 token to obtain a fresh one.  The caller is responsible for persisting
 any refreshed credentials back to ``CloudScoutConfig.oauth_token_encrypted``
 (see ``agent_runner.run_cloud_agent``).
 Credential dict shape (Google OAuth2):
    {
        "token": "<access_token>",
        "refresh_token": "<refresh_token>",
        "token_uri": "https://oauth2.googleapis.com/token",
        "client_id": "<client_id>",
        "client_secret": "<client_secret>",
        "scopes": ["https://www.googleapis.com/auth/gmail.readonly"],
        "expiry": "2025-01-01T00:00:00Z"  # optional ISO-8601
    }
 """
 from __future__ import annotations
@@ -19,8 +38,13 @@ from app.integrations import EmailMessage
 logger = logging.getLogger(__name__)
 # Gmail search date format — e.g. "after:2025/01/01"
 _GMAIL_DATE_FMT = "%Y/%m/%d"
 # Maximum characters of body text forwarded to the LLM.
 _BODY_TRUNCATE = 8_000
 # Maximum messages retrieved per run (prevents runaway quota usage).
 _MAX_MESSAGES = 200
@@ -28,9 +52,20 @@ def _build_gmail_query(
    filter_config: dict[str, Any] | None,
    since: datetime | None,
 ) -> str:
    """Build a Gmail search query string from *filter_config* and *since*.
    Supported ``filter_config`` keys:
        labels (list[str]):  Gmail label names, e.g. ``["INBOX", "work"]``
        senders (list[str]): Sender addresses or domains to include
        date_range (dict):   ``{from: "<YYYY-MM-DD>", to: "<YYYY-MM-DD>"}``
    A hard ``since`` date (from last run) always overrides ``date_range.from``
    when it is earlier.
    """
    parts: list[str] = []
    cfg = filter_config or {}
    # Labels — joined with OR when multiple given.
    labels: list[str] = cfg.get("labels", [])
    if labels:
        if len(labels) == 1:
@@ -39,14 +74,17 @@ def _build_gmail_query(
            label_expr = " OR ".join(f"label:{lbl}" for lbl in labels)
            parts.append(f"({label_expr})")
    # Senders — each prefixed with "from:".
    senders: list[str] = cfg.get("senders", [])
    for sender in senders:
        parts.append(f"from:{sender}")
    # Date range.
    date_range: dict = cfg.get("date_range", {})
    from_str: str | None = date_range.get("from")
    to_str: str | None = date_range.get("to")
    # Determine effective "from" date: most recent of filter_config.date_range.from and since.
    effective_since: datetime | None = since
    if from_str:
        try:
@@ -72,12 +110,18 @@ def _build_gmail_query(
 def _strip_html(raw_html: str) -> str:
    """Remove HTML tags and decode entities to get plain text."""
    no_tags = re.sub(r"<[^>]+>", " ", raw_html)
    decoded = html.unescape(no_tags)
    return re.sub(r"\s+", " ", decoded).strip()
 def _parse_body(payload: dict[str, Any]) -> str:
    """Recursively extract the plain-text body from a Gmail message payload.
    Prefers ``text/plain``; falls back to ``text/html`` (stripped of tags).
    Returns an empty string if no body can be extracted.
    """
    mime_type: str = payload.get("mimeType", "")
    body: dict = payload.get("body", {})
    parts: list[dict] = payload.get("parts", [])
@@ -95,6 +139,7 @@ def _parse_body(payload: dict[str, Any]) -> str:
            return _strip_html(raw)
        return ""
    # Multipart — prefer text/plain part, fall back to text/html.
    plain_fallback = ""
    for part in parts:
        part_mime = part.get("mimeType", "")
@@ -110,6 +155,7 @@ def _parse_body(payload: dict[str, Any]) -> str:
 def _parse_date(raw: str) -> datetime:
    """Parse an RFC 2822 email date header into a UTC ``datetime``."""
    try:
        parsed = email.utils.parsedate_to_datetime(raw)
        if parsed.tzinfo is None:
@@ -120,6 +166,16 @@ def _parse_date(raw: str) -> datetime:
 class GmailClient:
    """Fetch email messages from a Gmail account via the Gmail REST API.
    Parameters
    ----------
    credentials_info:
        Decrypted OAuth2 credential dict.  Must contain at minimum
        ``token`` (access token) or ``refresh_token`` + ``token_uri`` +
        ``client_id`` + ``client_secret``.
    """
    def __init__(self, credentials_info: dict[str, Any]) -> None:
        from google.oauth2.credentials import Credentials
@@ -144,20 +200,38 @@ class GmailClient:
            expiry=expiry,
        )
    # ── Public API ─────────────────────────────────────────────────────────
    async def fetch_messages(
        self,
        filter_config: dict[str, Any] | None = None,
        since: datetime | None = None,
    ) -> list[EmailMessage]:
        """Return up to ``_MAX_MESSAGES`` emails matching *filter_config*.
        Runs the synchronous Google API calls inside ``asyncio.to_thread()``
        to avoid blocking the async event loop.
        Token refresh is performed automatically when the access token has
        expired.  After the call, ``self.refreshed_credentials`` may be
        consulted to detect whether new credentials should be persisted.
        """
        query = _build_gmail_query(filter_config, since)
        logger.debug("gmail: executing search query %r", query)
        return await asyncio.to_thread(self._fetch_sync, query)
    @property
    def refreshed_credentials(self) -> dict[str, Any] | None:
        """Return updated credential dict if the access token was refreshed.
        If the credentials were refreshed during ``fetch_messages()``, returns
        a new dict that should be re-encrypted and written back to the DB.
        Returns ``None`` if no refresh occurred.
        """
        creds = self._credentials
        if not creds.valid and creds.expired:
            return None
        # Check whether the token changed from what was stored.
        if creds.token != self._credentials_info.get("token"):
            result = {
                "token": creds.token,
@@ -172,11 +246,15 @@ class GmailClient:
            return result
        return None
    # ── Internal sync worker ───────────────────────────────────────────────
    def _fetch_sync(self, query: str) -> list[EmailMessage]:
        """Synchronous worker — called inside ``asyncio.to_thread()``."""
        import googleapiclient.discovery
        import googleapiclient.errors
        from google.auth.transport.requests import Request
        # Refresh token if needed before building the service.
        if self._credentials.expired and self._credentials.refresh_token:
            try:
                self._credentials.refresh(Request())
@@ -186,8 +264,9 @@ class GmailClient:
        service = googleapiclient.discovery.build(
            "gmail", "v1", credentials=self._credentials, cache_discovery=False
        )
-        user_api = service.users()
+        user_api = service.users()  # type: ignore[attr-defined]
        # ── List matching message IDs ──────────────────────────────────────
        ids: list[str] = []
        page_token: str | None = None
        while len(ids) < _MAX_MESSAGES:
@@ -214,10 +293,12 @@ class GmailClient:
                break
        if not ids:
            logger.debug("gmail: no messages matched query %r", query)
            return []
        logger.info("gmail: fetching %d message(s)", len(ids))
        # ── Fetch individual message details ──────────────────────────────
        messages: list[EmailMessage] = []
        for msg_id in ids:
            try:
@@ -245,8 +326,10 @@ class GmailClient:
                    date=date,
                    labels=labels,
                ))
            except googleapiclient.errors.HttpError as exc:
                logger.warning("gmail: skipping message %s — HTTP error: %s", msg_id, exc)
            except Exception as exc:
-                logger.warning("gmail: skipping message %s: %s", msg_id, exc)
+                logger.warning("gmail: skipping message %s — unexpected error: %s", msg_id, exc)
        logger.info("gmail: returned %d message(s)", len(messages))
        return messages
--- a/services/batch-agent/app/integrations/ms_graph.py
+++ b/services/batch-agent/app/integrations/ms_graph.py
@@ -1,6 +1,24 @@
-"""Microsoft Graph API client for Outlook and Teams.
+"""Microsoft Graph API client for Outlook and Teams cloud agent integration.
-Adapted for Batch Agent Service: import settings from shared.config.
+Handles two data sources:
 * **Outlook email** (``provider="outlook"``) — ``fetch_emails()`` calls
  ``/me/messages`` with an OData ``$filter`` built from ``filter_config``.
 * **Teams messages** (``provider="teams"``) — ``fetch_messages()`` calls
  ``/me/chats/getAllMessages`` filtered by date.
 Authentication uses MSAL ``PublicClientApplication`` to acquire a token
 from a stored refresh token.  The ``httpx.AsyncClient`` (already a project
 dependency) is used for all API calls.
 Credential dict shape (Microsoft OAuth2 / MSAL):
    {
        "access_token":  "<access_token>",
        "refresh_token": "<refresh_token>",
        "token_type":    "Bearer",
        "scope":         "Mail.Read ChannelMessage.Read.All offline_access",
        "expires_in":    3600
    }
 """
 from __future__ import annotations
@@ -12,19 +30,23 @@ from typing import Any
 import httpx
-from shared.config import settings
+from app.config.settings import settings
 from app.integrations import ChatMessage, EmailMessage
 logger = logging.getLogger(__name__)
 _GRAPH_BASE = "https://graph.microsoft.com/v1.0"
 # Max items fetched per run.
 _MAX_EMAILS = 200
 _MAX_MESSAGES = 200
 # Max characters of body forwarded to the LLM.
 _BODY_TRUNCATE = 8_000
 def _strip_html(raw: str) -> str:
    """Strip HTML tags and collapse whitespace."""
    no_tags = re.sub(r"<[^>]+>", " ", raw)
    import html as _html
    decoded = _html.unescape(no_tags)
@@ -32,6 +54,7 @@ def _strip_html(raw: str) -> str:
 def _odata_datetime(dt: datetime) -> str:
    """Format a datetime as an OData datetime literal (UTC, ISO 8601)."""
    utc = dt.astimezone(timezone.utc)
    return utc.strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -40,14 +63,29 @@ def _build_email_filter(
    filter_config: dict[str, Any] | None,
    since: datetime | None,
 ) -> str:
    """Build an OData ``$filter`` expression for the ``/me/messages`` endpoint.
    Supported ``filter_config`` keys:
        senders (list[str]):  Sender email addresses.
        date_range (dict):    ``{from: "<ISO-8601>", to: "<ISO-8601>"}``
        folders (list[str]):  Folder display names (not directly filterable
                              via OData, so ignored here — callers iterate
                              folder IDs separately if needed; listed for
                              completeness).
    A hard ``since`` date always overrides ``date_range.from`` when it is
    earlier.
    """
    clauses: list[str] = []
    cfg = filter_config or {}
    # Senders.
    senders: list[str] = cfg.get("senders", [])
    if senders:
        sender_clauses = [f"from/emailAddress/address eq '{s}'" for s in senders]
        clauses.append("(" + " or ".join(sender_clauses) + ")")
    # Date range.
    date_range: dict = cfg.get("date_range", {})
    from_str: str | None = date_range.get("from")
@@ -79,16 +117,33 @@ def _build_email_filter(
 class MSGraphClient:
    """Fetch emails and Teams messages via the Microsoft Graph REST API.
    Parameters
    ----------
    credentials_info:
        Decrypted MSAL credential dict.
    """
    def __init__(self, credentials_info: dict[str, Any]) -> None:
        self._credentials_info = credentials_info
        self._access_token: str = credentials_info.get("access_token", "")
        self._original_access_token: str = self._access_token
        self._refresh_token: str | None = credentials_info.get("refresh_token")
    # ── Token management ───────────────────────────────────────────────────
    def _auth_headers(self) -> dict[str, str]:
        return {"Authorization": f"Bearer {self._access_token}"}
    async def _refresh_access_token(self) -> None:
        """Use MSAL to exchange the refresh token for a fresh access token.
        Updates ``self._access_token`` and ``self._credentials_info`` in-place.
        Raises:
            RuntimeError: MSAL reports an auth error.
        """
        import msal
        app = msal.ConfidentialClientApplication(
@@ -109,6 +164,7 @@ class MSGraphClient:
            raise RuntimeError(f"MS Graph token refresh failed: {error}")
        self._access_token = result["access_token"]
        # MSAL may issue a new refresh token.
        if "refresh_token" in result:
            self._refresh_token = result["refresh_token"]
            self._credentials_info["refresh_token"] = result["refresh_token"]
@@ -116,10 +172,16 @@ class MSGraphClient:
    @property
    def refreshed_credentials(self) -> dict[str, Any] | None:
        """Return updated credential dict if the access token was refreshed.
        Returns ``None`` if no change was made.
        """
        if self._access_token != self._original_access_token:
            return {**self._credentials_info, "access_token": self._access_token}
        return None
    # ── HTTP helpers ───────────────────────────────────────────────────────
    async def _get(
        self,
        client: httpx.AsyncClient,
@@ -128,8 +190,10 @@ class MSGraphClient:
        *,
        retry_on_401: bool = True,
    ) -> dict[str, Any]:
        """GET *url* with auth; refresh token on 401 and retry once."""
        resp = await client.get(url, params=params, headers=self._auth_headers())
        if resp.status_code == 401 and retry_on_401 and self._refresh_token:
            logger.debug("ms_graph: 401 on %s — refreshing token", url)
            await self._refresh_access_token()
            resp = await client.get(url, params=params, headers=self._auth_headers())
        if resp.status_code == 429:
@@ -137,11 +201,22 @@ class MSGraphClient:
        resp.raise_for_status()
        return resp.json()
    # ── Public API ─────────────────────────────────────────────────────────
    async def fetch_emails(
        self,
        filter_config: dict[str, Any] | None = None,
        since: datetime | None = None,
    ) -> list[EmailMessage]:
        """Return up to ``_MAX_EMAILS`` Outlook messages matching *filter_config*.
        Parameters
        ----------
        filter_config:
            Optional dict with ``senders``, ``date_range``, ``folders`` keys.
        since:
            Hard lower-bound on email date (from last agent run).
        """
        odata_filter = _build_email_filter(filter_config, since)
        params: dict[str, Any] = {
            "$top": 50,
@@ -162,7 +237,7 @@ class MSGraphClient:
                    if len(emails) >= _MAX_EMAILS:
                        break
                url = data.get("@odata.nextLink", "")
-                params = {}
+                params = {}  # nextLink already contains encoded params.
        logger.info("ms_graph: fetched %d Outlook email(s)", len(emails))
        return emails
@@ -172,6 +247,13 @@ class MSGraphClient:
        filter_config: dict[str, Any] | None = None,
        since: datetime | None = None,
    ) -> list[ChatMessage]:
        """Return up to ``_MAX_MESSAGES`` Teams messages matching *filter_config*.
        Fetches from ``/me/chats/getAllMessages`` (personal + group chats).
        The ``filter_config.channels`` key is checked as a text-filter on
        the channel name post-fetch (the API doesn't support channel OData
        filter directly on ``getAllMessages``).
        """
        cfg = filter_config or {}
        channel_filter: list[str] = [c.lower() for c in cfg.get("channels", [])]
        params: dict[str, Any] = {"$top": 50}
@@ -186,9 +268,11 @@ class MSGraphClient:
                try:
                    data = await self._get(client, url, params if url.startswith(_GRAPH_BASE) else None)
                except httpx.HTTPStatusError as exc:
                    # getAllMessages requires specific licensing; degrade gracefully.
                    if exc.response.status_code in (403, 404):
                        logger.warning(
-                            "ms_graph: /me/chats/getAllMessages not available (%d)",
+                            "ms_graph: /me/chats/getAllMessages not available (%d) — "
                            "check Teams license or permissions",
                            exc.response.status_code,
                        )
                        break
@@ -208,6 +292,8 @@ class MSGraphClient:
        logger.info("ms_graph: fetched %d Teams message(s)", len(messages))
        return messages
    # ── Parsers ────────────────────────────────────────────────────────────
    @staticmethod
    def _parse_email(item: dict[str, Any]) -> EmailMessage:
        subject: str = item.get("subject", "(no subject)") or "(no subject)"
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,242 @@
 from contextlib import asynccontextmanager
 import logging
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from app.api.middleware.rate_limit import TierRateLimitMiddleware
 from app.api.middleware.sanitizer import SanitizerMiddleware
 from app.config.settings import settings
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
 )
 logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
 logging.getLogger("sqlalchemy.pool").setLevel(logging.WARNING)
 async def _memory_audit_cron_tick() -> None:
    """Weekly cron: contradiction scan + label canonicalization for all users (Phase 7)."""
    import logging  # noqa: PLC0415
    _log = logging.getLogger(__name__)
    _log.info("memory audit cron tick: starting")
    try:
        from app.db import async_session  # noqa: PLC0415
        from app.core.memory_maintenance import audit_memory  # noqa: PLC0415
        from app.models import User  # noqa: PLC0415
        from sqlalchemy import select  # noqa: PLC0415
        async with async_session() as db:
            result = await db.execute(select(User.id))
            user_ids: list[str] = list(result.scalars().all())
        for uid in user_ids:
            try:
                async with async_session() as db:
                    await audit_memory(db, uid)
            except Exception as exc:
                _log.warning("memory audit cron tick: audit_memory failed user=%s: %s", uid, exc)
        _log.info("memory audit cron tick: done users=%d", len(user_ids))
    except Exception as exc:
        _log.warning("memory audit cron tick: failed: %s", exc)
 async def _memory_cron_tick() -> None:
    """Hourly cron: drain Free-tier extraction queue + mine proactive patterns for Power+ users."""
    import logging  # noqa: PLC0415
    _log = logging.getLogger(__name__)
    _log.info("memory cron tick: starting")
    try:
        from app.db import async_session  # noqa: PLC0415
        from app.core.memory_maintenance import drain_extraction_queue, mine_proactive_patterns  # noqa: PLC0415
        from app.billing.tier_manager import tier_manager  # noqa: PLC0415
        from app.models import User  # noqa: PLC0415
        from sqlalchemy import select  # noqa: PLC0415
        async with async_session() as db:
            await drain_extraction_queue(db)
        # mine proactive patterns for every Power+ user
        async with async_session() as db:
            result = await db.execute(select(User.id))
            user_ids: list[str] = list(result.scalars().all())
        for uid in user_ids:
            try:
                async with async_session() as db:
                    tier = await tier_manager.get_tier(uid, db)
                    if tier_manager.check_feature(tier, "proactive_mining"):
                        await mine_proactive_patterns(db, uid)
            except Exception as exc:
                _log.warning("memory cron tick: mine_proactive_patterns failed user=%s: %s", uid, exc)
        _log.info("memory cron tick: done users=%d", len(user_ids))
    except Exception as exc:
        _log.warning("memory cron tick: failed: %s", exc)
 async def _scout_cron_tick() -> None:
    """Every-15-min cron: poll enabled cloud scouts (cron-fallback; push is primary).
    Skips any scout whose ``last_run_at`` is within the last 5 minutes so
    a push notification and the fallback cron don't double-fire within the
    same window.
    """
    import logging  # noqa: PLC0415
    import uuid  # noqa: PLC0415
    from datetime import datetime, timezone  # noqa: PLC0415
    _log = logging.getLogger(__name__)
    _log.info("scout cron tick: starting")
    try:
        from app.db import async_session  # noqa: PLC0415
        from app.models import CloudScoutConfig  # noqa: PLC0415
        from app.scouts.engine import ScoutEngine  # noqa: PLC0415
        from sqlalchemy import select  # noqa: PLC0415
        async with async_session() as session:
            scouts = (await session.execute(
                select(CloudScoutConfig).where(CloudScoutConfig.enabled == True)  # noqa: E712
            )).scalars().all()
        engine = ScoutEngine()
        triggered = 0
        for scout in scouts:
            # Rate-limit guard: push is primary; skip if ran within 5 minutes.
            if scout.last_run_at:
                elapsed = (datetime.now(tz=timezone.utc) - scout.last_run_at).total_seconds()
                if elapsed < 300:
                    continue
            try:
                await engine.trigger_scout(uuid.UUID(str(scout.id)))
                triggered += 1
            except Exception as exc:
                _log.warning("scout cron tick: trigger failed scout=%s: %s", scout.id, exc)
        _log.info("scout cron tick: done triggered=%d total=%d", triggered, len(scouts))
    except Exception as exc:
        _log.warning("scout cron tick: failed: %s", exc)
 async def _scout_watch_renewal_tick() -> None:
    """Every-24-hour cron: re-issue Gmail users.watch for scouts expiring within 24h.
    Handles missing or misconfigured connectors gracefully — logs and continues.
    """
    import logging  # noqa: PLC0415
    from datetime import datetime, timedelta, timezone  # noqa: PLC0415
    _log = logging.getLogger(__name__)
    _log.info("scout watch renewal tick: starting")
    try:
        from app.db import async_session  # noqa: PLC0415
        from app.models import CloudScoutConfig  # noqa: PLC0415
        from app.scouts.connectors.registry import get_connector  # noqa: PLC0415
        from sqlalchemy import select  # noqa: PLC0415
        threshold = datetime.now(tz=timezone.utc) + timedelta(hours=24)
        renewed = 0
        async with async_session() as session:
            scouts = (await session.execute(
                select(CloudScoutConfig).where(
                    CloudScoutConfig.enabled == True,  # noqa: E712
                    CloudScoutConfig.provider == "gmail",
                    CloudScoutConfig.gmail_watch_expires_at <= threshold,
                )
            )).scalars().all()
            for scout in scouts:
                try:
                    connector = get_connector("gmail")
                    await connector.renew_watch(scout)
                    renewed += 1
                except Exception:
                    _log.exception("scout watch renewal tick: renew failed scout=%s", scout.id)
            await session.commit()
        _log.info("scout watch renewal tick: done renewed=%d", renewed)
    except Exception as exc:
        _log.warning("scout watch renewal tick: failed: %s", exc)
@asynccontextmanager
 async def lifespan(app: FastAPI):
    # Startup: register source connectors.
    from app.scouts.connectors.gmail import GmailConnector  # noqa: PLC0415
    from app.scouts.connectors.registry import register_connector  # noqa: PLC0415
    register_connector(GmailConnector())
    # Startup: ensure agent tool modules are loaded.
    import app.agents  # noqa: F401
    scheduler = None
    if settings.SCHEDULER_ENABLED:
        from apscheduler.schedulers.asyncio import AsyncIOScheduler  # noqa: PLC0415
        scheduler = AsyncIOScheduler()
        scheduler.add_job(_memory_cron_tick, "interval", hours=1, id="memory_cron")
        scheduler.add_job(_memory_audit_cron_tick, "interval", weeks=1, id="memory_audit_cron")
        scheduler.add_job(
            _scout_cron_tick, "interval", minutes=15,
            id="scout_cron_tick", replace_existing=True,
        )
        scheduler.add_job(
            _scout_watch_renewal_tick, "interval", hours=24,
            id="scout_watch_renewal_tick", replace_existing=True,
        )
        scheduler.start()
        logging.getLogger(__name__).info("memory cron scheduler started (interval=1h)")
    yield
    if scheduler is not None:
        scheduler.shutdown(wait=False)
    # Shutdown: dispose SQLAlchemy connection pool
    from app.db import engine
    await engine.dispose()
 def create_app() -> FastAPI:
    app = FastAPI(
        title="AdiuvAI Cloud API",
        version="0.1.0",
        docs_url="/docs" if settings.ENV == "dev" else None,
        redoc_url=None,
        lifespan=lifespan,
    )
    app.add_middleware(
        CORSMiddleware,
        allow_origins=settings.CORS_ORIGINS,
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
    # Middleware stack (Starlette inserts at position 0, so last-added = outermost).
    # Request flow:  TierRateLimit → Sanitizer → CORS → Router
    # Response flow: Router → CORS → Sanitizer → TierRateLimit
    app.add_middleware(SanitizerMiddleware)
    app.add_middleware(TierRateLimitMiddleware)
    from app.api.routes import scouts, auth, billing, chat, device_ws, memory, scout_webhooks
    app.include_router(auth.router,           prefix="/api/v1")
    app.include_router(chat.router,           prefix="/api/v1")
    app.include_router(billing.router,        prefix="/api/v1")
    app.include_router(scouts.router,         prefix="/api/v1")
    app.include_router(scout_webhooks.router, prefix="/api/v1")
    app.include_router(device_ws.router,      prefix="/api/v1")
    app.include_router(memory.router,         prefix="/api/v1")
    @app.get("/api/v1/health", tags=["health"])
    async def health() -> dict:
        return {"status": "ok", "version": app.version}
    return app
 app = create_app()
--- a/shared/models.py
+++ b/shared/models.py
@@ -1,14 +1,20 @@
 """SQLAlchemy ORM models for all persistent tables.
-Centralized here so that Alembic migrations and all services share
+Only auth, billing, scout config, and memory data live here.
-the same model definitions.  Each service only queries the tables it owns.
+User content (notes, tasks, etc.) lives exclusively on the client.
-Ownership:
+Table inventory:
-  Auth Service      → users, refresh_tokens, subscriptions
+  users               — account credentials + tier
-  Chat Service      → memory_core, memory_associative, memory_episodic, memory_proactive
+  refresh_tokens      — hashed refresh token store
-  Batch Agent       → local_agent_configs, cloud_agent_configs, agent_run_logs
+  subscriptions       — Stripe subscription records
-  Billing Service   → subscriptions (shared write with Auth)
+  local_scout_configs — per-device batch scout configs
-  (excluded MVP)    → storage_records, backup_metadata, plugins, plugin_*, revenue_events
+  cloud_scout_configs — OAuth-backed cloud scout configs
  scout_run_logs      — execution history for all scouts
  memory_core         — per-user persistent key/value preferences (encrypted)
  memory_associative  — per-user semantic memory with embeddings (encrypted)
  memory_episodic     — per-user session summaries (encrypted)
  memory_proactive    — per-user behavioral patterns (encrypted)
  memory_relations    — per-user entity/relation graph (Mem0g-light, Phase 3)
 """
 from __future__ import annotations
@@ -16,8 +22,8 @@ from __future__ import annotations
 import uuid
 from datetime import datetime, timezone
 from pgvector.sqlalchemy import Vector
 from sqlalchemy import (
    BigInteger,
    Boolean,
    DateTime,
    Enum,
@@ -25,15 +31,17 @@ from sqlalchemy import (
    ForeignKey,
    Integer,
    JSON,
    LargeBinary,
    String,
    Text,
    UniqueConstraint,
    Uuid,
    func,
    text,
 )
 from sqlalchemy.orm import Mapped, mapped_column, relationship
-from shared.db import Base
+from app.db import Base
 # ── Helpers ──────────────────────────────────────────────────────────────
@@ -49,14 +57,12 @@ def _now() -> datetime:
 # ── Enum types ────────────────────────────────────────────────────────────
 TierEnum = Enum("free", "pro", "power", "team", name="billing_tier")
 PluginStatusEnum = Enum("pending_review", "approved", "rejected", name="plugin_status")
 ReviewDecisionEnum = Enum("approved", "rejected", name="review_decision")
 AgentTypeEnum = Enum("local", "cloud", name="agent_type")
 AgentStatusEnum = Enum("running", "success", "error", "partial", name="agent_run_status")
 CloudProviderEnum = Enum("gmail", "teams", "outlook", name="cloud_provider")
-# ── Auth models ───────────────────────────────────────────────────────────
+# ── Models ────────────────────────────────────────────────────────────────
 class User(Base):
@@ -68,13 +74,19 @@ class User(Base):
    email: Mapped[str] = mapped_column(String(255), unique=True, nullable=False, index=True)
    name: Mapped[str | None] = mapped_column(String(100), nullable=True)
    surname: Mapped[str | None] = mapped_column(String(100), nullable=True)
-    password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
+    password_hash: Mapped[str | None] = mapped_column(String(255), nullable=True)
    avatar_url: Mapped[str | None] = mapped_column(Text, nullable=True)
    tier: Mapped[str] = mapped_column(TierEnum, nullable=False, default="free")
    stripe_customer_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
    # Per-user Fernet key (base64-urlsafe, 44 chars). Generated on registration.
    # Used to encrypt/decrypt all memory rows for this user.
    encryption_key: Mapped[str | None] = mapped_column(String(64), nullable=True)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    onboarding_completed_at: Mapped[datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True, default=None
    )
    updated_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()
    )
@@ -85,6 +97,9 @@ class User(Base):
    subscription: Mapped[Subscription | None] = relationship(
        back_populates="user", uselist=False, cascade="all, delete-orphan"
    )
    oauth_accounts: Mapped[list[OAuthAccount]] = relationship(
        back_populates="user", cascade="all, delete-orphan"
    )
 class RefreshToken(Base):
@@ -105,6 +120,25 @@ class RefreshToken(Base):
    user: Mapped[User] = relationship(back_populates="refresh_tokens")
 class OAuthAccount(Base):
    __tablename__ = "oauth_accounts"
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
    )
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
    )
    provider: Mapped[str] = mapped_column(String(50), nullable=False)
    provider_user_id: Mapped[str] = mapped_column(String(255), nullable=False)
    provider_email: Mapped[str | None] = mapped_column(String(255), nullable=True)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    user: Mapped[User] = relationship(back_populates="oauth_accounts")
 class Subscription(Base):
    __tablename__ = "subscriptions"
@@ -126,161 +160,8 @@ class Subscription(Base):
    user: Mapped[User] = relationship(back_populates="subscription")
-# ── Storage models (excluded from MVP, kept for Alembic) ──────────────
+class LocalScoutConfig(Base):
-
+    __tablename__ = "local_scout_configs"
 class StorageRecord(Base):
    __tablename__ = "storage_records"
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
    )
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
    )
    table_name: Mapped[str] = mapped_column(String(100), nullable=False)
    s3_key: Mapped[str] = mapped_column(String(500), nullable=False)
    checksum: Mapped[str] = mapped_column(String(64), nullable=False)
    size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    updated_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()
    )
 class BackupMetadata(Base):
    __tablename__ = "backup_metadata"
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
    )
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
    )
    s3_key: Mapped[str] = mapped_column(String(500), nullable=False)
    version: Mapped[int] = mapped_column(Integer, nullable=False)
    timestamp: Mapped[int] = mapped_column(BigInteger, nullable=False)
    checksum: Mapped[str] = mapped_column(String(64), nullable=False)
    size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
 # ── Plugin models (excluded from MVP, kept for Alembic) ───────────────
 class Plugin(Base):
    __tablename__ = "plugins"
    id: Mapped[str] = mapped_column(String(255), primary_key=True)
    name: Mapped[str] = mapped_column(String(255), nullable=False)
    description: Mapped[str] = mapped_column(Text, nullable=False, default="")
    version: Mapped[str] = mapped_column(String(50), nullable=False, default="1.0.0")
    author_id: Mapped[str | None] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="SET NULL"), nullable=True
    )
    author_name: Mapped[str] = mapped_column(String(255), nullable=False, default="")
    category: Mapped[str] = mapped_column(String(100), nullable=False, default="")
    price_cents: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    permissions: Mapped[str] = mapped_column(Text, nullable=False, default="[]")
    status: Mapped[str] = mapped_column(PluginStatusEnum, nullable=False, default="pending_review")
    s3_package_key: Mapped[str | None] = mapped_column(String(500), nullable=True)
    install_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    avg_rating: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
    rejection_reason: Mapped[str | None] = mapped_column(Text, nullable=True)
    submitted_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    installations: Mapped[list[PluginInstallation]] = relationship(
        back_populates="plugin", cascade="all, delete-orphan"
    )
    reviews: Mapped[list[PluginReview]] = relationship(
        back_populates="plugin", cascade="all, delete-orphan"
    )
    revenue_events: Mapped[list[RevenueEvent]] = relationship(
        back_populates="plugin", cascade="all, delete-orphan"
    )
 class PluginInstallation(Base):
    __tablename__ = "plugin_installations"
    __table_args__ = (UniqueConstraint("plugin_id", "user_id", name="uq_plugin_user"),)
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
    )
    plugin_id: Mapped[str] = mapped_column(
        String(255), ForeignKey("plugins.id", ondelete="CASCADE"), nullable=False, index=True
    )
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
    )
    installed_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    plugin: Mapped[Plugin] = relationship(back_populates="installations")
 class PluginReview(Base):
    __tablename__ = "plugin_reviews"
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
    )
    plugin_id: Mapped[str] = mapped_column(
        String(255), ForeignKey("plugins.id", ondelete="CASCADE"), nullable=False, index=True
    )
    reviewer_id: Mapped[str | None] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="SET NULL"), nullable=True
    )
    decision: Mapped[str] = mapped_column(ReviewDecisionEnum, nullable=False)
    notes: Mapped[str | None] = mapped_column(Text, nullable=True)
    reviewed_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    plugin: Mapped[Plugin] = relationship(back_populates="reviews")
 class RevenueEvent(Base):
    __tablename__ = "revenue_events"
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
    )
    plugin_id: Mapped[str] = mapped_column(
        String(255), ForeignKey("plugins.id", ondelete="CASCADE"), nullable=False, index=True
    )
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
    )
    amount_cents: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    developer_share_cents: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    stripe_transfer_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
    paid_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    plugin: Mapped[Plugin] = relationship(back_populates="revenue_events")
 # ── Agent models ──────────────────────────────────────────────────────────
 class LocalAgentConfig(Base):
    __tablename__ = "local_agent_configs"
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
@@ -293,6 +174,7 @@ class LocalAgentConfig(Base):
    directory_paths: Mapped[list] = mapped_column(JSON, nullable=False, default=list)
    data_types: Mapped[list] = mapped_column(JSON, nullable=False, default=list)
    prompt_template: Mapped[str] = mapped_column(Text, nullable=False, default="")
    scout_config: Mapped[dict | None] = mapped_column(JSON, nullable=True)
    file_extensions: Mapped[list] = mapped_column(JSON, nullable=False, default=list)
    schedule_cron: Mapped[str] = mapped_column(String(100), nullable=False, default="0 */6 * * *")
    enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
@@ -304,17 +186,17 @@ class LocalAgentConfig(Base):
        DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()
    )
-    run_logs: Mapped[list[AgentRunLog]] = relationship(
+    run_logs: Mapped[list["ScoutRunLog"]] = relationship(
-        back_populates="local_agent",
+        back_populates="local_scout",
-        primaryjoin="and_(AgentRunLog.agent_id == LocalAgentConfig.id, AgentRunLog.agent_type == 'local')",
+        primaryjoin="and_(ScoutRunLog.scout_id == LocalScoutConfig.id, ScoutRunLog.scout_type == 'local')",
-        foreign_keys="AgentRunLog.agent_id",
+        foreign_keys="ScoutRunLog.scout_id",
        cascade="all, delete-orphan",
-        overlaps="run_logs,cloud_agent",
+        overlaps="run_logs,cloud_scout",
    )
-class CloudAgentConfig(Base):
+class CloudScoutConfig(Base):
-    __tablename__ = "cloud_agent_configs"
+    __tablename__ = "cloud_scout_configs"
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
@@ -337,55 +219,97 @@ class CloudAgentConfig(Base):
    updated_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()
    )
    auto_trash_spam: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default=text("false"))
    gmail_history_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
    gmail_watch_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
    device_inactivity_pause_days: Mapped[int] = mapped_column(Integer, nullable=False, default=14, server_default="14")
-    run_logs: Mapped[list[AgentRunLog]] = relationship(
+    run_logs: Mapped[list["ScoutRunLog"]] = relationship(
-        back_populates="cloud_agent",
+        back_populates="cloud_scout",
-        primaryjoin="and_(AgentRunLog.agent_id == CloudAgentConfig.id, AgentRunLog.agent_type == 'cloud')",
+        primaryjoin="and_(ScoutRunLog.scout_id == CloudScoutConfig.id, ScoutRunLog.scout_type == 'cloud')",
-        foreign_keys="AgentRunLog.agent_id",
+        foreign_keys="ScoutRunLog.scout_id",
        cascade="all, delete-orphan",
-        overlaps="run_logs,local_agent",
+        overlaps="run_logs,local_scout",
    )
-class AgentRunLog(Base):
+class ScoutTriageQueue(Base):
-    __tablename__ = "agent_run_logs"
+    __tablename__ = "scout_triage_queue"
    __table_args__ = (
        UniqueConstraint("scout_id", "source_msg_ref", name="uq_scout_triage_queue_scout_msg"),
    )
    id: Mapped[str] = mapped_column(Uuid(as_uuid=False), primary_key=True, default=_uuid)
    user_id: Mapped[str] = mapped_column(Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
    scout_id: Mapped[str] = mapped_column(Uuid(as_uuid=False), ForeignKey("cloud_scout_configs.id", ondelete="CASCADE"), nullable=False)
    source_type: Mapped[str] = mapped_column(String(50), nullable=False)
    source_msg_ref: Mapped[str] = mapped_column(String(255), nullable=False)
    triage_verdict: Mapped[str] = mapped_column(String(20), nullable=False)
    triage_reason: Mapped[str | None] = mapped_column(Text, nullable=True)
    status: Mapped[str] = mapped_column(String(20), nullable=False, default="queued", server_default="queued")
    triaged_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
    delivered_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
    acked_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
    expires_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
 class ScoutRunLog(Base):
    __tablename__ = "scout_run_logs"
    id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), primary_key=True, default=_uuid
    )
-    agent_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
+    # Plain string — not a FK because it references either local_scout_configs or cloud_scout_configs
-    agent_type: Mapped[str] = mapped_column(AgentTypeEnum, nullable=False)
+    # depending on scout_type. Query by (scout_id, scout_type) to locate the source config.
    scout_id: Mapped[str] = mapped_column(String(255), nullable=False, index=True)
    scout_type: Mapped[str] = mapped_column(AgentTypeEnum, nullable=False)
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True
    )
    status: Mapped[str] = mapped_column(AgentStatusEnum, nullable=False, default="running")
    items_processed: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    items_created: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    tokens_used: Mapped[int] = mapped_column(Integer, nullable=False, default=0, server_default="0")
    errors: Mapped[list | None] = mapped_column(JSON, nullable=True)
    started_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
-    local_agent: Mapped[LocalAgentConfig | None] = relationship(
+    local_scout: Mapped["LocalScoutConfig | None"] = relationship(
        back_populates="run_logs",
-        primaryjoin="and_(AgentRunLog.agent_id == LocalAgentConfig.id, AgentRunLog.agent_type == 'local')",
+        primaryjoin="and_(ScoutRunLog.scout_id == LocalScoutConfig.id, ScoutRunLog.scout_type == 'local')",
-        foreign_keys="AgentRunLog.agent_id",
+        foreign_keys="ScoutRunLog.scout_id",
-        overlaps="run_logs,cloud_agent",
+        overlaps="run_logs,cloud_scout",
    )
-    cloud_agent: Mapped[CloudAgentConfig | None] = relationship(
+    cloud_scout: Mapped["CloudScoutConfig | None"] = relationship(
        back_populates="run_logs",
-        primaryjoin="and_(AgentRunLog.agent_id == CloudAgentConfig.id, AgentRunLog.agent_type == 'cloud')",
+        primaryjoin="and_(ScoutRunLog.scout_id == CloudScoutConfig.id, ScoutRunLog.scout_type == 'cloud')",
-        foreign_keys="AgentRunLog.agent_id",
+        foreign_keys="ScoutRunLog.scout_id",
-        overlaps="run_logs,local_agent",
+        overlaps="run_logs,local_scout",
    )
-# ── Memory models ─────────────────────────────────────────────────────────
+class MonthlyTokenUsage(Base):
    __tablename__ = "monthly_token_usage"
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"), primary_key=True
    )
    year_month: Mapped[str] = mapped_column(String(7), primary_key=True)  # 'YYYY-MM'
    feature: Mapped[str] = mapped_column(String(64), primary_key=True)
    tokens_used: Mapped[int] = mapped_column(Integer, nullable=False, default=0, server_default="0")
 # ── Memory models ─────────────────────────────────────────────────────────────
 class MemoryCore(Base):
-    """Per-user persistent key/value preferences, encrypted at rest."""
+    """Per-user persistent key/value preferences, encrypted at rest.
    Examples: preferred_language, timezone, work_style.
    Decrypted in-memory only using User.encryption_key.
    """
    __tablename__ = "memory_core"
@@ -402,7 +326,11 @@ class MemoryCore(Base):
 class MemoryAssociative(Base):
-    """Per-user semantic memory: encrypted content + pgvector embedding."""
+    """Per-user semantic memory: encrypted content + pgvector embedding for similarity search.
    Production: ``embedding`` column is ``vector(1536)`` via pgvector.
    Tests (SQLite): stored as JSON list.
    """
    __tablename__ = "memory_associative"
@@ -412,7 +340,8 @@ class MemoryAssociative(Base):
        nullable=False, index=True,
    )
    content_encrypted: Mapped[str] = mapped_column(Text, nullable=False)
-    embedding: Mapped[list | None] = mapped_column(JSON, nullable=True)
+    # vector(1536) via pgvector; SQLite tests use NULL embeddings so no dialect issue.
    embedding: Mapped[list | None] = mapped_column(Vector(1536), nullable=True)
    entity_type: Mapped[str | None] = mapped_column(String(100), nullable=True)
    entity_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
    updated_at: Mapped[datetime] = mapped_column(
@@ -421,7 +350,10 @@ class MemoryAssociative(Base):
 class MemoryEpisodic(Base):
-    """Per-user session summaries, encrypted at rest."""
+    """Per-user session summaries, encrypted at rest.
    One row per session interaction; used to recall recent conversations.
    """
    __tablename__ = "memory_episodic"
@@ -438,7 +370,11 @@ class MemoryEpisodic(Base):
 class MemoryProactive(Base):
-    """Per-user inferred behavioral patterns, encrypted at rest."""
+    """Per-user inferred behavioral patterns, encrypted at rest.
    Confidence in [0.0, 1.0]; only patterns above threshold are injected.
    Source: 'inferred' (from episodes) or 'explicit' (user-stated).
    """
    __tablename__ = "memory_proactive"
@@ -453,3 +389,85 @@ class MemoryProactive(Base):
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
 class ExtractionQueue(Base):
    """Batch extraction queue for Free-tier users (Phase 2).
    Pro/Power/Team users get realtime asyncio.create_task() extraction.
    Free users get a queue row here; a daily cron (Phase 5) drains it.
    """
    __tablename__ = "extraction_queue"
    id: Mapped[str] = mapped_column(Uuid(as_uuid=False), primary_key=True, default=_uuid)
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"),
        nullable=False, index=True,
    )
    episode_id: Mapped[str | None] = mapped_column(
        Uuid(as_uuid=False), nullable=True,
    )
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
 class MemoryRelation(Base):
    """Per-user entity/relation graph row (Mem0g-light, Phase 3).
    subject_label/object_label are plaintext entity identifiers (not user content).
    notes_encrypted is optional Fernet-encrypted per-user commentary.
    confidence in [0.0, 1.0] — decays 5 % per 30 days since last_confirmed_at.
    """
    __tablename__ = "memory_relations"
    id: Mapped[str] = mapped_column(Uuid(as_uuid=False), primary_key=True, default=_uuid)
    user_id: Mapped[str] = mapped_column(
        Uuid(as_uuid=False), ForeignKey("users.id", ondelete="CASCADE"),
        nullable=False, index=True,
    )
    subject_label: Mapped[str] = mapped_column(String(128), nullable=False)
    subject_type: Mapped[str] = mapped_column(String(32), nullable=False)
    predicate: Mapped[str] = mapped_column(String(64), nullable=False)
    object_label: Mapped[str] = mapped_column(String(128), nullable=False)
    object_type: Mapped[str] = mapped_column(String(32), nullable=False)
    confidence: Mapped[float] = mapped_column(Float, nullable=False, default=0.7)
    source_episode_id: Mapped[str | None] = mapped_column(
        Uuid(as_uuid=False),
        ForeignKey("memory_episodic.id", ondelete="SET NULL"),
        nullable=True,
    )
    notes_encrypted: Mapped[bytes | None] = mapped_column(LargeBinary, nullable=True)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
    updated_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now(), onupdate=func.now()
    )
    last_confirmed_at: Mapped[datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
 class Plugin(Base):
    """Plugin marketplace catalog entry."""
    __tablename__ = "plugins"
    id: Mapped[str] = mapped_column(String(255), primary_key=True)
    name: Mapped[str] = mapped_column(String(255), nullable=False)
    description: Mapped[str] = mapped_column(Text, nullable=False)
    version: Mapped[str] = mapped_column(String(50), nullable=False)
    author_name: Mapped[str] = mapped_column(String(255), nullable=False)
    category: Mapped[str] = mapped_column(String(100), nullable=False)
    price_cents: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    permissions: Mapped[str] = mapped_column(Text, nullable=False, default="[]")
    status: Mapped[str] = mapped_column(String(50), nullable=False, default="pending")
    s3_package_key: Mapped[str | None] = mapped_column(String(500), nullable=True)
    install_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
    avg_rating: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, server_default=func.now()
    )
--- a/app/schemas/init.py
+++ b/app/schemas/init.py
@@ -1,7 +1,6 @@
 """Pydantic schemas — API request/response contracts.
-Shared across all services. Mirrors the TypeScript types from
+Mirrors the TypeScript types from the Electron app (src/shared/api-types.ts).
 the Electron app (src/shared/api-types.ts).
 """
 from __future__ import annotations
@@ -31,6 +30,16 @@ class UserProfile(BaseModel):
    name: str | None = None
    surname: str | None = None
    tier: BillingTier
    avatar_url: str | None = None
    has_password: bool = True
    onboarding_completed_at: int | None = None  # epoch ms, null = not onboarded
    memory: dict[str, str] = Field(default_factory=dict)  # decrypted core memory k/v
 class OAuthAccountInfo(BaseModel):
    provider: str
    provider_email: str | None = None
    created_at: int  # epoch ms
 # ── Chat ─────────────────────────────────────────────────────────────
@@ -51,88 +60,6 @@ class ChatResponse(BaseModel):
    response: str
 # ── Backup ───────────────────────────────────────────────────────────
 class BackupMetadata(BaseModel):
    version: int
    timestamp: int
    checksum: str
    chunk_count: int
 # ── Cloud Storage (E2E encrypted blobs) ──────────────────────────────
 class StorageRecord(BaseModel):
    id: str
    user_id: str
    table: str
    blob: bytes
    checksum: str
    created_at: int
    updated_at: int
 class StorageRecordCreate(BaseModel):
    table: str
    blob: bytes
    checksum: str
 class StorageRecordUpdate(BaseModel):
    blob: bytes
    checksum: str
 # ── Cloud Vector Store (E2E encrypted vectors) ────────────────────────
 class VectorItem(BaseModel):
    id: str
    blob: bytes
    checksum: str
 class VectorUpsertRequest(BaseModel):
    vectors: list[VectorItem]
 class VectorSearchRequest(BaseModel):
    query_blob: bytes
    top_k: int = 10
 class VectorSearchResult(BaseModel):
    id: str
    score: float
    blob: bytes
 class VectorSearchResponse(BaseModel):
    results: list[VectorSearchResult]
 # ── Plugin Marketplace ────────────────────────────────────────────────
 class PluginManifest(BaseModel):
    id: str
    name: str
    description: str
    version: str
    author: str
    permissions: list[str]
    category: str
    price_cents: int = 0
 class PluginListResponse(BaseModel):
    plugins: list[PluginManifest]
    total: int
    page: int
 class PluginInstallRequest(BaseModel):
    plugin_id: str
 # ── WebSocket Frame Protocol ──────────────────────────────────────────
 class WsFrameType(str, Enum):
@@ -146,11 +73,9 @@ class WsFrameType(str, Enum):
    device_hello = "device_hello"
    # ── v3 frame types ─────────────────────────────────────────────────
    home_request = "home_request"
    floating_request = "floating_request"
    stream_start = "stream_start"
    stream_text = "stream_text"
    stream_end = "stream_end"
    floating_domain = "floating_domain"
    data_request = "data_request"
    data_response = "data_response"
    mutation = "mutation"
@@ -158,6 +83,24 @@ class WsFrameType(str, Enum):
    journey_start = "journey_start"
    journey_message = "journey_message"
    journey_reply = "journey_reply"
    # ── v5 brief frame types ──────────────────────────────────────────
    brief_request = "brief_request"
    # ── v6 task brief frame types ─────────────────────────────────────
    task_brief_request = "task_brief_request"
    # ── v7 folder index frame types ───────────────────────────────────
    index_session_start = "index_session_start"
    index_file_batch = "index_file_batch"
    index_session_cancel = "index_session_cancel"
    index_file_result = "index_file_result"
    index_session_progress = "index_session_progress"
    index_session_done = "index_session_done"
    # ── v8 contextual sidebar frame types ────────────────────────────
    contextual_request = "contextual_request"
    contextual_scope_update = "contextual_scope_update"
    contextual_scope_ack = "contextual_scope_ack"
    # ── v9 scout proposal frame types ────────────────────────────────
    SCOUT_PROPOSAL = "scout_proposal"
    SCOUT_PROPOSAL_ACK = "scout_proposal_ack"
 class WsToolCall(BaseModel):
@@ -207,16 +150,20 @@ class WsDeviceHello(BaseModel):
    type: Literal[WsFrameType.device_hello] = WsFrameType.device_hello
    device_id: str
-    agent_ids: list[str] = Field(default_factory=list)
+    scout_ids: list[str] = Field(default_factory=list)
 # ── WebSocket v3 Frame Models ─────────────────────────────────────────
-class WsFloatingScope(BaseModel):
+class FormatPrefsModel(BaseModel):
-    """Scope for a floating request."""
+    """User display preferences sent by Electron on each request."""
-    type: Literal["task", "project", "note", "timeline"]
+    timezone: str = "UTC"
-    id: str | None = None
+    date_format: str = "dd/MM/yyyy"
    time_format: str = "24h"
    locale: str = "en-US"
    now_iso: str = ""
 class WsHomeRequest(BaseModel):
@@ -225,14 +172,18 @@ class WsHomeRequest(BaseModel):
    type: Literal[WsFrameType.home_request] = WsFrameType.home_request
    message: str
    conversation_history: list[dict[str, Any]] = Field(default_factory=list)
    format_prefs: FormatPrefsModel | None = None
-class WsFloatingRequest(BaseModel):
+class WsBriefRequest(BaseModel):
-    """Client → Server: Floating chat message scoped to an entity."""
+    """Client → Server: Request a plain-text brief (home or project)."""
-    type: Literal[WsFrameType.floating_request] = WsFrameType.floating_request
+    type: Literal[WsFrameType.brief_request] = WsFrameType.brief_request
-    message: str
+    request_id: str | None = None
-    scope: WsFloatingScope
+    session_id: str | None = None
    mode: Literal["home", "project"]
    project_id: str | None = None
    format_prefs: FormatPrefsModel | None = None
 class WsStreamStart(BaseModel):
@@ -255,57 +206,65 @@ class WsStreamEnd(BaseModel):
    type: Literal[WsFrameType.stream_end] = WsFrameType.stream_end
    request_id: str
    error: str | None = None
    mutations: list[dict[str, Any]] | None = None
-class WsDomain(BaseModel):
+# ── Scout Config V2 ───────────────────────────────────────────────────
    """Structured floating domain payload for UI routing decisions."""
    type: Literal["task", "timeline", "project", "node"]
    id: str | None = None
    section: Literal["task", "timeline", "note"] | None = None
-class WsFloatingDomain(BaseModel):
+class ScoutContentTypeConfig(BaseModel):
-    """Server → Client: domain determined for a floating request."""
+    """Per-type extraction config produced by the journey chatbot."""
-    type: Literal[WsFrameType.floating_domain] = WsFrameType.floating_domain
+    id: str
-    request_id: str
+    label: str = ""
-    domain: WsDomain
+    detection_hint: str = ""
    preprocessing: str = "generic"  # handler name: "email_html", "plain_text", ...
    extraction_prompt: str
-# ── Agent Catalog ─────────────────────────────────────────────────────
+class ScoutConfig(BaseModel):
    """Structured scout configuration (replaces freeform prompt_template)."""
-class AgentCatalogItem(BaseModel):
+    content_types: list[ScoutContentTypeConfig] = []
    global_rules: list[str] = []
    data_types: list[str] = []
 # ── Scout Catalog ─────────────────────────────────────────────────────
 class ScoutCatalogItem(BaseModel):
    type: str
    name: str
    description: str
-class AgentCreationCheckRequest(BaseModel):
+class ScoutCreationCheckRequest(BaseModel):
    active_agents: int = Field(ge=0, default=0)
-class AgentCreationCheckResponse(BaseModel):
+class ScoutCreationCheckResponse(BaseModel):
    allowed: bool
    tier: BillingTier
    active_agents: int
    limit: int
-class AgentTriggerRequest(BaseModel):
+class ScoutTriggerRequest(BaseModel):
    directory: str = Field(min_length=1)
    device_id: str = Field(default="")
-    agent_id: str | None = None
+    agent_id: str | None = None  # FE stable agent ID (electron-store UUID)
    what_to_extract: list[str] = Field(min_length=1)
    actions_by_type: dict[str, list[str]] | None = None
    batch_interval: str = Field(min_length=1)
-    custom_agent_prompt: str = Field(min_length=1)
+    custom_agent_prompt: str | None = None
    agent_config: dict | None = None
    active_agents: int = Field(ge=0, default=0)
    last_run_at: int | None = None  # epoch ms from FE — enables incremental scanning
-# ── Agent Run Log ─────────────────────────────────────────────────────
+# ── Scout Run Log ─────────────────────────────────────────────────────
-class AgentRunLogResponse(BaseModel):
+class ScoutRunLogResponse(BaseModel):
    id: str
    agent_id: str
    agent_type: Literal["local", "cloud"]
@@ -315,3 +274,29 @@ class AgentRunLogResponse(BaseModel):
    errors: list[str]
    started_at: int
    completed_at: int | None
 # ── Chatbot Journey ───────────────────────────────────────────────────
 # ── Scout Proposal Frame Models ───────────────────────────────────────
 class ScoutProposalPayload(BaseModel):
    id: str
    scout_id: str
    source_type: str
    source_msg_ref: str
    raw_subject: str | None = None
    raw_snippet: str | None = None
    category: Literal["unprocessed"] = "unprocessed"
    payload: dict | None = None
 class ScoutProposalFrame(BaseModel):
    type: Literal[WsFrameType.SCOUT_PROPOSAL]
    proposal: ScoutProposalPayload
 class ScoutProposalAckFrame(BaseModel):
    type: Literal[WsFrameType.SCOUT_PROPOSAL_ACK]
    proposal_id: str
--- a/app/schemas/contextual.py
+++ b/app/schemas/contextual.py
@@ -0,0 +1,73 @@
 """Contextual sidebar scope schema and prompt block renderer.
 ContextualScope mirrors the TypeScript ContextualScope type sent by the
 Electron renderer when the user opens the side chat anchored to a specific
 view.  The renderer ships camelCase keys; Pydantic's alias_generator maps
 them to snake_case Python attributes automatically.
 """
 from __future__ import annotations
 from typing import Literal, Optional
 from pydantic import BaseModel, ConfigDict
 from pydantic.alias_generators import to_camel
 PageType = Literal[
    "timeline",
    "tasks",
    "projects-list",
    "project",
    "note",
 ]
 EntityType = Literal["project", "note", "task", "timeline_event"]
 class ContextualScope(BaseModel):
    """Scope payload sent by the Electron renderer for contextual chat.
    The renderer ships camelCase keys (entityType, entityId, ...).  Pydantic's
    alias generator maps them to snake_case Python attrs.
    """
    model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
    page: PageType
    entity_type: Optional[EntityType] = None
    entity_id: Optional[str] = None
    entity_name: Optional[str] = None
    project_id: Optional[str] = None
    char_count: Optional[int] = None
    counts: Optional[dict[str, int]] = None
    filters: Optional[dict] = None
 def render_scope_block(scope: ContextualScope) -> str:
    """Produce a single-paragraph human-readable summary of the current view
    for injection into the contextual agent system prompt.
    Never emits internal ids — only names.  The LLM is told to use names in
    prose; ids travel through tool calls.
    """
    if scope.entity_type == "project":
        c = scope.counts or {}
        return (
            f"User is viewing the project {scope.entity_name!r}. "
            f"{c.get('tasks', 0)} tasks, "
            f"{c.get('notes', 0)} notes, "
            f"{c.get('milestones', 0)} milestones."
        )
    if scope.entity_type == "note":
        return (
            f"User is viewing the note {scope.entity_name!r} "
            f"({scope.char_count or 0} characters)."
        )
    if scope.page == "tasks":
        return "User is viewing the global Tasks list (all projects)."
    if scope.page == "timeline":
        return "User is viewing the global Timeline view."
    if scope.page == "projects-list":
        return "User is viewing the Projects list."
    return f"User is on page {scope.page}."
--- a/app/scouts/init.py
+++ b/app/scouts/init.py
--- a/app/scouts/connectors/init.py
+++ b/app/scouts/connectors/init.py
--- a/app/scouts/connectors/base.py
+++ b/app/scouts/connectors/base.py
@@ -0,0 +1,56 @@
 """Source connector Protocol and shared item types.
 A SourceConnector adapts a third-party data source (Gmail, Slack, ...) to the
 shared ScoutEngine interface. Each connector owns:
  * how to enumerate new items since the last poll (``list_new``)
  * how to fetch a single item's metadata cheaply (``fetch_metadata``)
  * how to fetch a single item's full content for in-memory triage
    (``fetch_content``) — this content MUST NOT be persisted by the engine
  * how to archive/trash an item (``archive``) for spam handling
  * optional push-notification setup (``setup_watch`` / ``renew_watch``)
 """
 from __future__ import annotations
 from datetime import datetime
 from typing import Literal, Protocol
 from pydantic import BaseModel, Field
 class ItemRef(BaseModel):
    source_msg_ref: str
    received_at: datetime | None = None
 class ItemMetadata(BaseModel):
    subject: str | None = None
    sender: str | None = None
    snippet: str | None = None
    received_at: datetime | None = None
 class ItemContent(BaseModel):
    metadata: ItemMetadata
    body_text: str
    raw_headers: dict[str, str] = Field(default_factory=dict)
 class TriageVerdict(BaseModel):
    verdict: Literal["relevant", "spam"]
    reason: str
    confidence: float = Field(ge=0.0, le=1.0)
 class SourceConnector(Protocol):
    """Adapter for a third-party data source (Gmail, Slack, ...)."""
    source_type: str  # e.g. "gmail"
    async def list_new(self, scout) -> list[ItemRef]: ...
    async def fetch_metadata(self, scout, ref: ItemRef) -> ItemMetadata: ...
    async def fetch_content(self, scout, ref: ItemRef) -> ItemContent: ...
    async def archive(self, scout, ref: ItemRef) -> None: ...
    async def setup_watch(self, scout) -> None: ...
    async def renew_watch(self, scout) -> None: ...
--- a/app/scouts/connectors/gmail.py
+++ b/app/scouts/connectors/gmail.py
@@ -0,0 +1,213 @@
 """Gmail SourceConnector — wraps the existing GmailClient.
 Responsibilities:
  * list_new: incremental fetch since the scout's stored gmail_history_id
  * fetch_metadata: subject + sender + snippet only (Gmail metadata format)
  * fetch_content: full body text — transient, never persisted by engine
  * archive: move a message to Gmail Trash (recoverable for 30 days)
  * setup_watch / renew_watch: Gmail push notifications via Pub/Sub
 """
 from __future__ import annotations
 import asyncio
 import logging
 from datetime import datetime, timezone
 from app.config.settings import settings
 from app.integrations import decrypt_token
 from app.scouts.connectors.base import ItemContent, ItemMetadata, ItemRef
 logger = logging.getLogger(__name__)
 def _extract_plain_text_body(payload: dict) -> str:
    """Recursively walk a Gmail message payload to find text/plain content."""
    import base64
    mime_type = payload.get("mimeType", "")
    if mime_type == "text/plain":
        data = payload.get("body", {}).get("data", "")
        if data:
            return base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace")
        return ""
    if mime_type.startswith("multipart/"):
        for part in payload.get("parts", []):
            text = _extract_plain_text_body(part)
            if text:
                return text
    # text/html fallback: strip tags rudimentarily if no text/plain part
    if mime_type == "text/html":
        data = payload.get("body", {}).get("data", "")
        if data:
            import re
            html = base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace")
            return re.sub(r"<[^>]+>", " ", html)
    return ""
 def _get_gmail_service(scout):
    """Return a synchronous Google API client for low-level metadata/history calls."""
    from googleapiclient.discovery import build
    from google.oauth2.credentials import Credentials
    creds_info = decrypt_token(scout.oauth_token_encrypted)
    credentials = Credentials(
        token=creds_info.get("token"),
        refresh_token=creds_info.get("refresh_token"),
        token_uri=creds_info.get("token_uri", "https://oauth2.googleapis.com/token"),
        client_id=creds_info.get("client_id"),
        client_secret=creds_info.get("client_secret"),
        scopes=creds_info.get("scopes"),
    )
    return build("gmail", "v1", credentials=credentials, cache_discovery=False)
 class GmailConnector:
    source_type = "gmail"
    # ── list_new ──────────────────────────────────────────────────────────
    async def list_new(self, scout) -> list[ItemRef]:
        """Return new message refs since scout.gmail_history_id.
        On first run (gmail_history_id is None/empty), records the current
        historyId without backfilling — avoids flooding the user with old mail.
        Updates scout.gmail_history_id in-place (caller must persist to DB).
        """
        def _sync() -> tuple[list[ItemRef], str | None]:
            service = _get_gmail_service(scout)
            history_id = scout.gmail_history_id
            refs: list[ItemRef] = []
            new_history_id = history_id
            if history_id:
                resp = (
                    service.users()
                    .history()
                    .list(
                        userId="me",
                        startHistoryId=history_id,
                        historyTypes=["messageAdded"],
                    )
                    .execute()
                )
                for entry in resp.get("history", []):
                    for added in entry.get("messagesAdded", []):
                        refs.append(ItemRef(source_msg_ref=added["message"]["id"]))
                new_history_id = resp.get("historyId", history_id)
            else:
                # First run: capture baseline history id without backfilling.
                profile = service.users().getProfile(userId="me").execute()
                new_history_id = profile["historyId"]
            return refs, new_history_id
        refs, new_history_id = await asyncio.to_thread(_sync)
        if new_history_id and new_history_id != scout.gmail_history_id:
            scout.gmail_history_id = new_history_id
        return refs
    # ── fetch_metadata ────────────────────────────────────────────────────
    async def fetch_metadata(self, scout, ref: ItemRef) -> ItemMetadata:
        """Fetch subject, sender, snippet only — uses Gmail metadata format (no body)."""
        def _sync() -> ItemMetadata:
            service = _get_gmail_service(scout)
            msg = (
                service.users()
                .messages()
                .get(
                    userId="me",
                    id=ref.source_msg_ref,
                    format="metadata",
                    metadataHeaders=["Subject", "From", "Date"],
                )
                .execute()
            )
            headers = {
                h["name"]: h["value"]
                for h in msg.get("payload", {}).get("headers", [])
            }
            return ItemMetadata(
                subject=headers.get("Subject"),
                sender=headers.get("From"),
                snippet=msg.get("snippet"),
                received_at=None,
            )
        return await asyncio.to_thread(_sync)
    # ── fetch_content ─────────────────────────────────────────────────────
    async def fetch_content(self, scout, ref: ItemRef) -> ItemContent:
        """Fetch full body text for a single message — transient, must not be persisted."""
        def _sync() -> ItemContent:
            service = _get_gmail_service(scout)
            msg = service.users().messages().get(
                userId="me", id=ref.source_msg_ref, format="full",
            ).execute()
            headers = {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])}
            body_text = _extract_plain_text_body(msg.get("payload", {}))
            return ItemContent(
                metadata=ItemMetadata(
                    subject=headers.get("Subject"),
                    sender=headers.get("From"),
                    snippet=msg.get("snippet"),
                    received_at=None,
                ),
                body_text=body_text,
                raw_headers=headers,
            )
        return await asyncio.to_thread(_sync)
    # ── archive ───────────────────────────────────────────────────────────
    async def archive(self, scout, ref: ItemRef) -> None:
        """Move the message to Gmail Trash (recoverable for 30 days)."""
        def _sync() -> None:
            service = _get_gmail_service(scout)
            service.users().messages().trash(
                userId="me", id=ref.source_msg_ref
            ).execute()
        await asyncio.to_thread(_sync)
    # ── watch management ──────────────────────────────────────────────────
    async def setup_watch(self, scout) -> None:
        """Register a Gmail Pub/Sub push watch for the INBOX label.
        Requires ``settings.GMAIL_PUBSUB_TOPIC`` to be set to the full topic
        resource name (e.g. ``projects/my-project/topics/gmail-push``).
        Logs a warning and returns without error if the topic is not configured.
        """
        topic = settings.GMAIL_PUBSUB_TOPIC
        if not topic:
            logger.warning(
                "setup_watch: GMAIL_PUBSUB_TOPIC is not configured — skipping watch setup"
            )
            return
        def _sync() -> None:
            service = _get_gmail_service(scout)
            request_body = {
                "labelIds": ["INBOX"],
                "topicName": topic,
            }
            resp = service.users().watch(userId="me", body=request_body).execute()
            scout.gmail_history_id = resp.get("historyId")
            expiration_ms = resp.get("expiration")
            if expiration_ms:
                scout.gmail_watch_expires_at = datetime.fromtimestamp(
                    int(expiration_ms) / 1000, tz=timezone.utc
                )
        await asyncio.to_thread(_sync)
    async def renew_watch(self, scout) -> None:
        """Renew an existing Gmail Pub/Sub watch (same as setup_watch)."""
        await self.setup_watch(scout)
--- a/app/scouts/connectors/registry.py
+++ b/app/scouts/connectors/registry.py
@@ -0,0 +1,32 @@
 """Connector registry — single source of truth for source_type -> connector."""
 from __future__ import annotations
 from typing import Any
 _CONNECTORS: dict[str, Any] = {}
 def register_connector(connector: Any) -> None:
    """Register a SourceConnector instance under its ``source_type``.
    Calling twice with the same ``source_type`` replaces the prior entry —
    useful for tests and hot-reload, but in production each connector
    should be registered exactly once at startup.
    """
    if not getattr(connector, "source_type", None):
        raise ValueError("Connector must declare a non-empty source_type")
    _CONNECTORS[connector.source_type] = connector
 def get_connector(source_type: str) -> Any:
    """Return the registered connector for ``source_type`` or raise KeyError."""
    try:
        return _CONNECTORS[source_type]
    except KeyError as exc:
        raise KeyError(f"No connector registered for source_type {source_type!r}") from exc
 def _reset_for_tests() -> None:
    """Clear the registry — for use in pytest fixtures only."""
    _CONNECTORS.clear()
--- a/app/scouts/engine.py
+++ b/app/scouts/engine.py
@@ -0,0 +1,270 @@
 """ScoutEngine — orchestrates triage, queueing, and delivery for cloud scouts.
 Triage flow per scout:
  1. Resolve scout config from the DB.
  2. Skip if device hasn't connected within ``device_inactivity_pause_days``.
  3. Ask the connector to ``list_new`` — fresh items since last poll.
  4. For each item:
     - skip if already in the queue (idempotent on (scout_id, source_msg_ref))
     - fetch the full content via the connector (transient, never persisted)
     - run the triage LLM call → relevant | spam
     - spam + auto_trash_spam → connector.archive
     - relevant → INSERT scout_triage_queue row
  5. Update scout.last_run_at.
 Delivery flow on Electron WS reconnect:
  - drain ``status='queued'`` rows for the user
  - fetch metadata-only for each (subject + snippet)
  - send a ``scout_proposal`` frame
  - flip status to ``delivered`` on ack
 """
 from __future__ import annotations
 import logging
 import uuid
 from datetime import datetime, timedelta, timezone
 from sqlalchemy import select
 from sqlalchemy.exc import IntegrityError
 from app.core.langfuse_client import extract_usage, get_langfuse, get_prompt_or_fallback
 from app.core.llm import get_llm
 from app.db import async_session
 from app.models import CloudScoutConfig, ScoutTriageQueue
 from app.scouts.connectors.base import ItemContent, ItemRef, TriageVerdict
 from app.scouts.connectors.registry import get_connector
 logger = logging.getLogger(__name__)
 QUEUE_TTL_DAYS = 30
 class ScoutEngine:
    def __init__(self, session_factory=None) -> None:
        self._session_factory = session_factory or async_session
    async def trigger_scout(self, scout_id: uuid.UUID) -> None:
        async with self._session_factory() as session:
            scout = await session.get(CloudScoutConfig, str(scout_id))
            if scout is None:
                logger.warning("trigger_scout: no such scout id=%s", scout_id)
                return
            if not scout.enabled:
                return
            # Device-inactivity pause check is a simple heuristic on last_run_at —
            # the device-online signal lives in the DeviceConnectionManager and is
            # consulted at delivery time. For triage, we only check that the
            # configured pause threshold isn't suppressing the run.
            connector = get_connector(scout.provider)
            try:
                refs = await connector.list_new(scout)
            except Exception:
                logger.exception("scout %s: list_new failed", scout.id)
                return
            for ref in refs:
                await self._process_item(session, scout, connector, ref)
            scout.last_run_at = datetime.now(tz=timezone.utc)
            await session.commit()
    async def _process_item(
        self,
        session,
        scout: CloudScoutConfig,
        connector,
        ref: ItemRef,
    ) -> None:
        # Idempotency check
        existing = await session.execute(
            select(ScoutTriageQueue.id).where(
                ScoutTriageQueue.scout_id == scout.id,
                ScoutTriageQueue.source_msg_ref == ref.source_msg_ref,
            )
        )
        if existing.first() is not None:
            return
        try:
            content = await connector.fetch_content(scout, ref)
        except Exception:
            logger.exception("scout %s: fetch_content failed for %s", scout.id, ref.source_msg_ref)
            return
        try:
            verdict = await self._triage_llm(scout, content)
        except Exception:
            logger.exception("scout %s: triage_llm failed for %s", scout.id, ref.source_msg_ref)
            return
        if verdict.verdict == "spam":
            if scout.auto_trash_spam:
                try:
                    await connector.archive(scout, ref)
                except Exception:
                    logger.exception("scout %s: archive failed for %s", scout.id, ref.source_msg_ref)
            return
        now = datetime.now(tz=timezone.utc)
        row = ScoutTriageQueue(
            id=str(uuid.uuid4()),
            user_id=scout.user_id,
            scout_id=scout.id,
            source_type=connector.source_type,
            source_msg_ref=ref.source_msg_ref,
            triage_verdict=verdict.verdict,
            triage_reason=verdict.reason,
            status="queued",
            triaged_at=now,
            expires_at=now + timedelta(days=QUEUE_TTL_DAYS),
        )
        session.add(row)
        try:
            # Use a savepoint so an IntegrityError on race doesn't poison the
            # outer session — works on both PostgreSQL (SAVEPOINT) and SQLite.
            async with session.begin_nested():
                await session.flush()
        except IntegrityError:
            # Race: another worker inserted between our SELECT and INSERT.
            # The unique constraint did its job; safe to ignore.
            logger.debug(
                "scout %s: idempotent skip for %s (race on unique constraint)",
                scout.id,
                ref.source_msg_ref,
            )
    async def deliver_pending(self, user_id: uuid.UUID, ws) -> None:
        """Drain status='queued' rows for user, send scout_proposal WS frames, flip to 'delivered'."""
        from app.scouts.connectors.base import ItemRef  # noqa: PLC0415
        async with self._session_factory() as session:
            rows = (await session.execute(
                select(ScoutTriageQueue).where(
                    ScoutTriageQueue.user_id == str(user_id),
                    ScoutTriageQueue.status == "queued",
                )
            )).scalars().all()
            for row in rows:
                try:
                    connector = get_connector(row.source_type)
                except KeyError:
                    logger.warning("deliver_pending: no connector for %s", row.source_type)
                    continue
                scout = await session.get(CloudScoutConfig, row.scout_id)
                if scout is None:
                    continue
                try:
                    meta = await connector.fetch_metadata(scout, ItemRef(source_msg_ref=row.source_msg_ref))
                except Exception:
                    logger.exception("deliver_pending: fetch_metadata failed")
                    continue
                payload = {
                    "type": "scout_proposal",
                    "proposal": {
                        "id": row.id,
                        "scout_id": row.scout_id,
                        "source_type": row.source_type,
                        "source_msg_ref": row.source_msg_ref,
                        "raw_subject": meta.subject,
                        "raw_snippet": meta.snippet,
                        "category": "unprocessed",
                        "payload": None,
                    },
                }
                await ws.send_json(payload)
                row.status = "delivered"
                row.delivered_at = datetime.now(tz=timezone.utc)
            await session.commit()
    async def ack_proposal(self, proposal_id: str) -> None:
        """Flip a delivered proposal to acked. Idempotent — no-op if already acked."""
        async with self._session_factory() as session:
            row = await session.get(ScoutTriageQueue, proposal_id)
            if row is None:
                return
            row.status = "acked"
            row.acked_at = datetime.now(tz=timezone.utc)
            await session.commit()
    async def _triage_llm(self, scout: CloudScoutConfig, content: ItemContent) -> TriageVerdict:
        """Call the scout-triage-system Langfuse prompt to classify an item as relevant or spam.
        Uses gpt-4o-mini with JSON mode. Wraps the LLM call in a Langfuse generation
        observation when Langfuse is configured.
        """
        import json  # noqa: PLC0415
        from langchain_core.messages import HumanMessage, SystemMessage  # noqa: PLC0415
        _TRIAGE_FALLBACK = (
            "You are a triage classifier for an executive-assistant scout that watches a "
            "{source_type} feed.\n"
            'The scout\'s purpose is: "{scout_purpose}".\n\n'
            "Given one item, decide whether it is RELEVANT (worth surfacing to the user as a "
            "potential task / event / note / project) or SPAM (advertising, mass marketing, "
            "phishing, bulk notifications with no actionable content).\n\n"
            "Item:\n"
            "  - Subject: {item_subject}\n"
            "  - From:    {item_sender}\n"
            "  - Body (truncated): {item_body_truncated_2k}\n\n"
            'Return JSON only, matching this schema:\n'
            '  {{"verdict": "relevant" | "spam", "reason": <short string>, "confidence": <0..1>}}\n\n'
            "Be conservative on \"spam\" — if a message could plausibly be a personal/work "
            "email, mark it relevant."
        )
        template, prompt_obj = get_prompt_or_fallback("scout-triage-system", _TRIAGE_FALLBACK)
        body_trunc = (content.body_text or "")[:2000]
        variables = dict(
            source_type=scout.provider,
            scout_purpose=scout.prompt_template or "",
            item_subject=content.metadata.subject or "",
            item_sender=content.metadata.sender or "",
            item_body_truncated_2k=body_trunc,
        )
        if prompt_obj is not None:
            try:
                system_text = prompt_obj.compile(**variables)
                if isinstance(system_text, list):
                    system_text = "\n".join(
                        m.get("content", "") for m in system_text if isinstance(m, dict)
                    )
            except Exception as exc:
                logger.warning("scout triage: compile failed: %s", exc)
                system_text = template.replace("{{source_type}}", variables["source_type"]) \
                    .replace("{{scout_purpose}}", variables["scout_purpose"]) \
                    .replace("{{item_subject}}", variables["item_subject"]) \
                    .replace("{{item_sender}}", variables["item_sender"]) \
                    .replace("{{item_body_truncated_2k}}", variables["item_body_truncated_2k"])
        else:
            system_text = template.format(**variables)
        llm = get_llm(model="gpt-4o-mini", temperature=0)
        llm_json = llm.bind(response_format={"type": "json_object"})  # type: ignore[attr-defined]
        messages = [
            SystemMessage(content=system_text),
            HumanMessage(content="Classify this item."),
        ]
        lf = get_langfuse()
        if lf:
            with lf.start_as_current_observation(
                as_type="generation",
                name="scout-triage",
                model="gpt-4o-mini",
                prompt=prompt_obj,
                input=messages,
            ) as gen:
                response = await llm_json.ainvoke(messages)
                gen.update(output=response.content, usage=extract_usage(response))
        else:
            response = await llm_json.ainvoke(messages)
        data = json.loads(response.content)
        return TriageVerdict(**data)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,34 +1,27 @@
 # ── Adiuva Microservices ─────────────────────────────────────────────
 # docker compose up --build
 # docker compose up --build auth ws-gateway chat   # subset
 services:
-
+  app:
-  # ═══════════════════════════════════════════════════════════════════
+    build: .
  #  Infrastructure
  # ═══════════════════════════════════════════════════════════════════
  traefik:
    image: traefik:v3.1
    ports:
-      - "80:80"
+      - "8080:8000"
-      - "443:443"
+    env_file:
-      - "8080:8080"   # dashboard (dev only)
+      - path: .env
        required: false
    environment:
-      CF_DNS_API_TOKEN: ${CF_DNS_API_TOKEN:-}
+      DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/adiuvai
      GITHUB_COPILOT_TOKEN_DIR: /root/.config/litellm/github_copilot
    volumes:
-      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - copilot_tokens:/root/.config/litellm/github_copilot
-      - ./traefik/traefik.yml:/etc/traefik/traefik.yml:ro
+    depends_on:
-      - ./traefik/dynamic:/etc/traefik/dynamic:ro
+      db:
-      - traefik_acme:/etc/traefik/acme
+        condition: service_healthy
    restart: unless-stopped
  db:
    image: pgvector/pgvector:pg16
    environment:
-      POSTGRES_USER: ${POSTGRES_USER:-postgres}
+      POSTGRES_USER: postgres
-      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
+      POSTGRES_PASSWORD: postgres
-      POSTGRES_DB: ${POSTGRES_DB:-adiuva}
+      POSTGRES_DB: adiuvai
    volumes:
      - postgres_data:/var/lib/postgresql/data
    healthcheck:
@@ -38,161 +31,11 @@ services:
      retries: 5
    restart: unless-stopped
-  redis:
+  # Optional Redis for future rate-limit or caching needs
-    image: redis:7-alpine
+  # redis:
-    command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
+  #   image: redis:7-alpine
    volumes:
      - redis_data:/data
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 5s
      timeout: 3s
      retries: 5
    restart: unless-stopped
  # ── Optional infrastructure (uncomment as needed) ────────────────
  # minio:
  #   image: minio/minio:latest
  #   command: server /data --console-address ":9001"
  #   ports:
  #     - "9000:9000"
  #     - "9001:9001"
  #   environment:
  #     MINIO_ROOT_USER: minioadmin
  #     MINIO_ROOT_PASSWORD: minioadmin
  #   volumes:
  #     - minio_data:/data
  #   healthcheck:
  #     test: ["CMD", "mc", "ready", "local"]
  #     interval: 5s
  #     timeout: 5s
  #     retries: 5
  #   restart: unless-stopped
  # qdrant:
  #   image: qdrant/qdrant:latest
  #   ports:
  #     - "6333:6333"
  #     - "6334:6334"
  #   volumes:
  #     - qdrant_data:/qdrant/storage
  #   restart: unless-stopped
  # ═══════════════════════════════════════════════════════════════════
  #  Migrations (run once, then exit)
  # ═══════════════════════════════════════════════════════════════════
  migrate:
    build:
      context: .
      dockerfile: Dockerfile
    command: ["python", "-m", "alembic", "upgrade", "head"]
    env_file:
      - path: .env
        required: false
    environment:
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-adiuva}
    depends_on:
      db:
        condition: service_healthy
    restart: "no"
  # ═══════════════════════════════════════════════════════════════════
  #  Application Services
  # ═══════════════════════════════════════════════════════════════════
  auth:
    build:
      context: .
      dockerfile: services/auth/Dockerfile
    env_file:
      - path: .env
        required: false
    environment:
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-adiuva}
      REDIS_URL: redis://redis:6379/0
    depends_on:
      db:
        condition: service_healthy
      migrate:
        condition: service_completed_successfully
    restart: unless-stopped
  ws-gateway:
    build:
      context: .
      dockerfile: services/ws-gateway/Dockerfile
    env_file:
      - path: .env
        required: false
    environment:
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-adiuva}
      REDIS_URL: redis://redis:6379/0
    depends_on:
      redis:
        condition: service_healthy
      auth:
        condition: service_started
    restart: unless-stopped
  chat:
    build:
      context: .
      dockerfile: services/chat/Dockerfile
    env_file:
      - path: .env
        required: false
    environment:
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-adiuva}
      REDIS_URL: redis://redis:6379/0
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
      migrate:
        condition: service_completed_successfully
    restart: unless-stopped
  batch-agent:
    build:
      context: .
      dockerfile: services/batch-agent/Dockerfile
    env_file:
      - path: .env
        required: false
    environment:
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-adiuva}
      REDIS_URL: redis://redis:6379/0
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
      migrate:
        condition: service_completed_successfully
    restart: unless-stopped
  billing:
    build:
      context: .
      dockerfile: services/billing/Dockerfile
    env_file:
      - path: .env
        required: false
    environment:
      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-adiuva}
    depends_on:
      db:
        condition: service_healthy
      migrate:
        condition: service_completed_successfully
    restart: unless-stopped
 volumes:
  postgres_data:
-  redis_data:
+  copilot_tokens:
  traefik_acme:
  # minio_data:
  # qdrant_data:
--- a/docs/MICROSERVICES_ARCHITECTURE.md
+++ b/docs/MICROSERVICES_ARCHITECTURE.md
@@ -1,941 +0,0 @@
 # Adiuva — Architettura Microservizi (MVP)
 ## Panoramica
 Il monolite viene suddiviso in **4 servizi MVP** + un **API Gateway (Traefik)**, orchestrati con Docker Compose su un singolo VPS raggiungibile via Cloudflare.
 > **Fuori dall'MVP**: Storage Service (S3/backup CRUD) e Plugin Service (marketplace). Verranno aggiunti come servizi indipendenti in una fase successiva.
 ```
                          ┌──────────────┐
                          │  Cloudflare  │
                          │  (DNS + CDN) │
                          └──────┬───────┘
                                 │ HTTPS / WSS
                          ┌──────▼───────┐
                          │   Traefik    │
                          │ API Gateway  │
                          │  (routing,   │
                          │   TLS, rate  │
                          │   limiting)  │
                          └──────┬───────┘
                                 │
          ┌──────────┬───────────┼───────────┐
          │          │           │           │
    ┌─────▼────┐ ┌───▼───┐ ┌────▼────┐ ┌────▼───┐
    │  Auth    │ │  Chat │ │  Agent  │ │Billing │
    │ Service  │ │Service│ │ Service │ │Service │
    └─────┬────┘ └───┬───┘ └────┬────┘ └────┬───┘
          │          │          │           │
    ┌─────▼──────────▼──────────▼───────────▼────┐
    │              Infrastruttura                 │
    │  PostgreSQL  │  Redis  │  Qdrant            │
    └─────────────────────────────────────────────┘
 ```
 ---
 ## 1. Suddivisione dei Servizi
 ### 1.1 Auth Service (`auth-service`)
 **Responsabilità**: Registrazione, login, refresh token, profilo utente, encryption key.
 | Endpoint originale | Metodo |
 |---|---|
 | `/api/v1/auth/register` | POST |
 | `/api/v1/auth/login` | POST |
 | `/api/v1/auth/refresh` | POST |
 | `/api/v1/auth/me` | GET / PUT |
 **Database**: Tabelle `users`, `refresh_tokens` (PostgreSQL condiviso, schema `auth`).
 **Modifica chiave — JWT con RS256**:
 Il monolite usa un `SECRET_KEY` simmetrico (HS256). Con i microservizi, passare a **RS256** (asimmetrico):
 - L'Auth Service firma i JWT con la **chiave privata**.
 - Tutti gli altri servizi verificano i JWT con la **chiave pubblica** senza mai contattare l'Auth Service.
 - La chiave pubblica viene esposta via `GET /api/v1/auth/.well-known/jwks.json` oppure montata come volume condiviso.
 ```python
 # auth-service/app/auth/jwt.py
 from cryptography.hazmat.primitives.asymmetric import rsa
 from jose import jwt
 PRIVATE_KEY = ...  # Da env/secret
 PUBLIC_KEY = ...   # Derivata o da env
 def create_access_token(user_id: str, tier: str) -> str:
    return jwt.encode(
        {"sub": user_id, "tier": tier, "exp": ...},
        PRIVATE_KEY,
        algorithm="RS256",
    )
 ```
 ```python
 # shared/auth.py  (usato da tutti gli altri servizi)
 from jose import jwt
 PUBLIC_KEY = ...  # Volume montato o fetched da JWKS endpoint
 def verify_token(token: str) -> dict:
    return jwt.decode(token, PUBLIC_KEY, algorithms=["RS256"])
 ```
 **Scaling**: 2 repliche sufficienti, stateless. Rate-limit dedicato su `/login` e `/register`.
 ---
 ### 1.2 Chat Service (`chat-service`) ⭐ Real-time
 **Responsabilità**: WebSocket device connection, home chat, floating chat, memory middleware, streaming LLM responses verso il client.
 Questo servizio gestisce la **connessione persistente** con l'app Electron e le interazioni **real-time** dell'utente (chat home, floating chat). È il proprietario della WebSocket.
 | Endpoint | Tipo |
 |---|---|
 | `/api/v1/ws/device` | WebSocket (connessione persistente) |
 | `/api/v1/chat` | POST (REST fallback) |
 **Moduli inclusi**: `deep_agent`, `memory_middleware`, `ws_context`, `device_manager` (Redis-backed), `output_formatter`, `llm`, tutti gli agent tools (`task_agent`, `project_agent`, `note_agent`, `timeline_agent`).
 **Perché separato dall'Agent Service**: Il Chat Service tiene la WebSocket aperta e risponde in tempo reale (streaming). Scalare aggiungendo repliche è semplice con sticky sessions + Redis pub/sub per il cross-instance routing dei tool_call.
 **Scaling**: 2–N repliche. Sticky cookies per le WS + Redis per cross-instance.
 ---
 ### 1.3 Agent Service (`agent-service`) ⭐ Batch
 **Responsabilità**: Batch agent processing (directory scanning, file classification, entity extraction), agent setup journeys, agent configuration CRUD.
 Questo servizio gestisce i processi **long-running** e **CPU-intensive**: scansione filesystem, classificazione file con LLM, estrazione entità in batch. Non possiede la WebSocket — comunica con il device dell'utente tramite **Redis pub/sub** passando per il Chat Service.
 | Endpoint | Tipo |
 |---|---|
 | `/api/v1/agents/catalog` | GET |
 | `/api/v1/agents/can-create` | POST |
 | `/api/v1/agents/trigger` | POST |
 | `/api/v1/agents/journey/start` | POST (o WS relay) |
 | `/api/v1/agents/journey/message` | POST (o WS relay) |
 **Moduli inclusi**: `agent_runner`, `agent_registry`, `filesystem_agent`, `llm`.
 **Flusso tool-call cross-service** (l'Agent Service non ha la WS):
 ```
 ┌──────────────┐            ┌──────────────┐            ┌──────────┐
 │ Agent Service│            │    Redis     │            │  Chat    │
 │ (batch run)  │            │              │            │ Service  │
 │              │            │              │            │ (ha WS)  │
 │ 1. Needs to  │  PUBLISH   │              │ SUBSCRIBE  │          │
 │    read file ├───────────►│tool_call:u123├───────────►│ 2. Invia │
 │    from      │            │              │            │    al    │
 │    device    │            │              │            │    device│
 │              │            │              │            │    via WS│
 │              │  SUBSCRIBE │              │  PUBLISH   │          │
 │ 4. Riceve   ◄────────────┤tool_result:id│◄───────────┤ 3. Device│
 │    risultato │            │              │            │    reply │
 └──────────────┘            └──────────────┘            └──────────┘
 ```
 **Scaling**: 1–N repliche. Completamente stateless, scala indipendentemente dalla chat. Ogni replica processa batch job diversi. Può essere scalato a 0 se non ci sono agent attivi (risparmio risorse).
 **Vantaggio dello split**: Se 50 utenti triggerano agenti batch contemporaneamente, il Chat Service non ne risente — le risposte real-time rimangono veloci.
 ---
 ### 1.4 Billing Service (`billing-service`)
 **Responsabilità**: Stripe checkout, webhook, subscription management.
 | Endpoint originale | Metodo |
 |---|---|
 | `/api/v1/billing/checkout` | POST |
 | `/api/v1/billing/webhook` | POST |
 | `/api/v1/billing/subscription` | GET / DELETE |
 **Database**: Tabelle `subscriptions` (schema `billing`).
 **Comunicazione inter-servizio**: Quando Stripe invia un webhook e il tier cambia, il Billing Service pubblica un evento su **Redis pub/sub** channel `tier_changed:{user_id}`. L'Auth Service aggiorna il campo `tier` nella tabella users. Al prossimo token refresh il JWT conterrà il tier aggiornato.
 **Scaling**: 1 replica sufficiente. Basso traffico.
 ---
 ### 1.5 Servizi esclusi dall'MVP
 I seguenti servizi verranno aggiunti post-MVP come servizi indipendenti:
 | Servizio | Responsabilità | Note |
 |---|---|---|
 | **Storage Service** | S3 blobs CRUD, vector ops, backup | Le funzionalità vector/embed possono restare nel Chat Service per il MVP |
 | **Plugin Service** | Marketplace, install, revenue split | Feature non critica per il lancio |
 ---
 ## 2. Tier Check — Dove e Come
 Il tier dell'utente (free/pro/power/team) determina rate-limiting, quote e accesso a funzionalità. Con i microservizi, **ogni servizio controlla il tier autonomamente** senza chiamare l'Auth Service.
 ### Strategia: Tier nel JWT
 L'Auth Service include il `tier` come claim nel JWT al momento del login/refresh:
 ```json
 {
  "sub": "user_123",
  "tier": "pro",
  "exp": 1742515200,
  "iat": 1742511600
 }
 ```
 Ogni servizio:
 1. Decodifica il JWT con la chiave pubblica (già lo fa per l'auth)
 2. Legge `payload["tier"]` — **zero chiamate extra**
 3. Applica le sue regole di enforcement localmente
 ```python
 # shared/auth.py — dependency FastAPI condivisa
 from fastapi import Depends, HTTPException, Request
 from jose import jwt
 PUBLIC_KEY = ...
 class CurrentUser:
    def __init__(self, user_id: str, tier: str):
        self.user_id = user_id
        self.tier = tier
 async def get_current_user(request: Request) -> CurrentUser:
    token = request.headers.get("Authorization", "").removeprefix("Bearer ")
    payload = jwt.decode(token, PUBLIC_KEY, algorithms=["RS256"])
    return CurrentUser(user_id=payload["sub"], tier=payload["tier"])
 def require_tier(*allowed_tiers: str):
    """Dependency che blocca se il tier non è tra quelli ammessi."""
    async def check(user: CurrentUser = Depends(get_current_user)):
        if user.tier not in allowed_tiers:
            raise HTTPException(403, "Tier insufficient")
        return user
    return check
 ```
 ### Cosa succede quando il tier cambia (upgrade/downgrade)?
 ```
 ┌──────────┐  Stripe webhook   ┌──────────┐  tier_changed   ┌──────────┐
 │  Stripe  │ ─────────────────►│ Billing  │ ───────────────►│   Auth   │
 │          │                    │ Service  │  (Redis pub/sub) │ Service  │
 └──────────┘                    └──────────┘                  └────┬─────┘
                                                                   │
                                                          UPDATE users
                                                          SET tier = 'power'
                                                                   │
                                                    Al prossimo /refresh
                                                    il JWT conterrà tier='power'
 ```
 **Latenza del cambio**: Il tier si propaga al prossimo token refresh (tipicamente 15–30 min, o il client può forzare un refresh immediato dopo il checkout). Per il billing webhook, il downgrade può essere forzato invalidando il refresh token su Redis → il client è obbligato a ri-autenticarsi.
 ### Dove si applica in ciascun servizio
 | Servizio | Enforcement |
 |---|---|
 | **Auth Service** | Nessuno (è lui che scrive il tier) |
 | **Chat Service** | Rate-limit per tier (req/min), quota messaggi |
 | **Agent Service** | Max agent configs, max runs/day, max concurrent batches |
 | **Billing Service** | Nessuno (gestisce i tier, non li consuma) |
 ### Rate-limit distribuito via Redis
 Poiché ogni servizio ha le sue repliche, il rate-limiting deve essere **condiviso** via Redis:
 ```python
 # shared/middleware/rate_limit.py
 import redis.asyncio as aioredis
 class DistributedRateLimiter:
    def __init__(self, redis: aioredis.Redis):
        self._redis = redis
    async def check(self, user_id: str, tier: str, service: str) -> bool:
        limits = {"free": 20, "pro": 60, "power": 120, "team": 200}
        max_req = limits.get(tier, 20)
        key = f"rate:{service}:{user_id}"
        pipe = self._redis.pipeline()
        pipe.incr(key)
        pipe.expire(key, 60)
        count, _ = await pipe.execute()
        return count <= max_req
 ```
 ---
 ## 3. WebSocket con Scaling Orizzontale — Il Problema Chiave
 `DeviceConnectionManager` è un **singleton in-memory**:
 ```python
 class DeviceConnectionManager:
    def __init__(self):
        self._connections: dict[str, DeviceConnection] = {}  # ← In-memory!
 ```
 Con N istanze del Chat Service, il device si connette a **una sola** istanza. Quando un'altra istanza deve inviare un `tool_call` a quel device (es. un agent trigger da un'API call), non trova la connessione.
 ### La soluzione: Redis Pub/Sub + Registry
 ```
 ┌──────────────────────────────────────────────────────────────┐
 │                     Redis                                    │
 │                                                              │
 │  Hash: ws:connections                                        │
 │    user_123 → instance_A                                     │
 │    user_456 → instance_B                                     │
 │                                                              │
 │  Pub/Sub channels:                                           │
 │    tool_call:{user_id}  → tool call payloads                 │
 │    tool_result:{call_id} → tool result payloads              │
 │    stream:{user_id}     → text_chunk streaming               │
 └──────────────────────────────────────────────────────────────┘
 Instance A (ha WS di user_123)     Instance B (deve chiamare tool su user_123)
 ┌───────────────────────┐          ┌───────────────────────┐
 │  1. Sottoscrive a     │          │  1. Lookup Redis Hash │
 │     tool_call:user_123│          │     → user_123 è su A │
 │                       │          │                       │
 │  2. Riceve tool_call  │◄─────────│  2. PUBLISH           │
 │     da Redis channel  │          │    tool_call:user_123 │
 │                       │          │    {id, action, ...}  │
 │  3. Invia al device   │          │                       │
 │     via WS            │          │  4. SUBSCRIBE         │
 │                       │          │    tool_result:{id}   │
 │  4. Device risponde   │          │                       │
 │     tool_result       │──────────│► 5. Riceve risultato  │
 │                       │          │                       │
 │  5. PUBLISH           │          │                       │
 │    tool_result:{id}   │          │                       │
 └───────────────────────┘          └───────────────────────┘
 ```
 ### Implementazione: `RedisDeviceManager`
 ```python
 # chat-service/app/core/device_manager.py
 import asyncio
 import json
 import os
 import redis.asyncio as aioredis
 from dataclasses import dataclass, field
 from fastapi import WebSocket
 INSTANCE_ID = os.environ.get("INSTANCE_ID", os.urandom(8).hex())
@dataclass
 class LocalConnection:
    ws: WebSocket
    device_id: str
    pending_calls: dict[str, asyncio.Future[dict]] = field(default_factory=dict)
 class RedisDeviceManager:
    """Device manager backed by Redis for cross-instance communication."""
    def __init__(self, redis_url: str = "redis://redis:6379"):
        self._redis = aioredis.from_url(redis_url)
        self._pubsub = self._redis.pubsub()
        self._local: dict[str, LocalConnection] = {}  # Solo connessioni locali
        self._remote_futures: dict[str, asyncio.Future[dict]] = {}
    async def start(self):
        """Avvia il listener Redis per tool_call in arrivo."""
        asyncio.create_task(self._listen_tool_calls())
    # ── Registrazione ──
    async def register(self, user_id: str, device_id: str, ws: WebSocket):
        # Registra localmente
        self._local[user_id] = LocalConnection(ws=ws, device_id=device_id)
        # Registra in Redis quale istanza ha la connessione
        await self._redis.hset("ws:connections", user_id, INSTANCE_ID)
        # Sottoscrivi ai tool_call per questo utente
        await self._pubsub.subscribe(f"tool_call:{user_id}")
    async def unregister(self, user_id: str):
        conn = self._local.pop(user_id, None)
        if conn:
            for fut in conn.pending_calls.values():
                if not fut.done():
                    fut.cancel()
        await self._redis.hdel("ws:connections", user_id)
        await self._pubsub.unsubscribe(f"tool_call:{user_id}")
    # ── Presenza ──
    async def is_online(self, user_id: str) -> bool:
        return await self._redis.hexists("ws:connections", user_id)
    # ── Tool-call round-trip (cross-instance) ──
    async def execute_tool_call(self, user_id: str, payload: dict) -> dict:
        """
        Invia un tool_call al device dell'utente.
        Funziona sia che la WS sia locale che su un'altra istanza.
        """
        call_id = payload["id"]
        # Caso 1: connessione locale → invio diretto
        if user_id in self._local:
            conn = self._local[user_id]
            loop = asyncio.get_event_loop()
            fut: asyncio.Future[dict] = loop.create_future()
            conn.pending_calls[call_id] = fut
            await conn.ws.send_text(json.dumps({"type": "tool_call", **payload}))
            return await asyncio.wait_for(fut, timeout=30.0)
        # Caso 2: connessione remota → Redis pub/sub
        loop = asyncio.get_event_loop()
        fut = loop.create_future()
        self._remote_futures[call_id] = fut
        # Sottoscrivi al canale di risposta
        result_channel = f"tool_result:{call_id}"
        await self._pubsub.subscribe(result_channel)
        # Pubblica il tool_call
        await self._redis.publish(
            f"tool_call:{user_id}",
            json.dumps(payload),
        )
        try:
            return await asyncio.wait_for(fut, timeout=30.0)
        finally:
            self._remote_futures.pop(call_id, None)
            await self._pubsub.unsubscribe(result_channel)
    # ── Risoluzione tool_result (da WS locale) ──
    def resolve_local(self, user_id: str, call_id: str, result: dict):
        conn = self._local.get(user_id)
        if conn:
            fut = conn.pending_calls.pop(call_id, None)
            if fut and not fut.done():
                fut.set_result(result)
    async def resolve_and_publish(self, user_id: str, call_id: str, result: dict):
        """Chiamato quando il device locale invia un tool_result."""
        self.resolve_local(user_id, call_id, result)
        # Pubblica anche su Redis per l'istanza remota che aspetta
        await self._redis.publish(
            f"tool_result:{call_id}",
            json.dumps(result),
        )
    # ── Listener Redis ──
    async def _listen_tool_calls(self):
        """Loop che ascolta i tool_call in arrivo da altre istanze."""
        async for message in self._pubsub.listen():
            if message["type"] != "message":
                continue
            channel = message["channel"]
            if isinstance(channel, bytes):
                channel = channel.decode()
            data = json.loads(message["data"])
            if channel.startswith("tool_call:"):
                # Un'altra istanza vuole che inviamo un tool_call al nostro device
                user_id = channel.split(":", 1)[1]
                conn = self._local.get(user_id)
                if conn:
                    await conn.ws.send_text(json.dumps({"type": "tool_call", **data}))
            elif channel.startswith("tool_result:"):
                # Risposta a un tool_call che abbiamo inviato tramite Redis
                call_id = channel.split(":", 1)[1]
                fut = self._remote_futures.pop(call_id, None)
                if fut and not fut.done():
                    fut.set_result(data)
    # ── Stream cross-instance ──
    async def publish_stream_chunk(self, user_id: str, chunk: dict):
        """Pubblica un chunk di streaming su Redis (per REST→WS relay)."""
        await self._redis.publish(f"stream:{user_id}", json.dumps(chunk))
 ```
 ---
 ## 4. Struttura Directory Proposta (MVP)
 ```
 adiuva-api/
 ├── docker-compose.yml          # Orchestrazione completa
 ├── docker-compose.dev.yml      # Override per sviluppo locale
 ├── shared/                     # Codice condiviso (montato come volume)
 │   ├── auth.py                 # JWT verification (chiave pubblica)
 │   ├── schemas.py              # Pydantic schemas condivisi
 │   ├── middleware/
 │   │   ├── rate_limit.py       # DistributedRateLimiter (Redis)
 │   │   └── sanitizer.py
 │   └── models/
 │       └── base.py             # SQLAlchemy base condivisa
 │
 ├── auth-service/
 │   ├── Dockerfile
 │   ├── requirements.txt
 │   └── app/
 │       ├── main.py
 │       ├── config.py
 │       ├── db.py
 │       ├── models.py           # users, refresh_tokens
 │       ├── routes/
 │       │   └── auth.py
 │       └── services/
 │           ├── jwt_service.py  # RS256 signing
 │           └── user_service.py
 │
 ├── chat-service/
 │   ├── Dockerfile
 │   ├── requirements.txt
 │   └── app/
 │       ├── main.py
 │       ├── config.py
 │       ├── db.py
 │       ├── models.py           # memory_*
 │       ├── routes/
 │       │   ├── device_ws.py    # WS connection owner
 │       │   └── chat.py         # REST fallback
 │       ├── core/
 │       │   ├── device_manager.py   # RedisDeviceManager
 │       │   ├── deep_agent.py       # Home + floating chat
 │       │   ├── memory_middleware.py
 │       │   ├── ws_context.py
 │       │   ├── output_formatter.py
 │       │   └── llm.py
 │       └── agents/                 # Tool definitions (used by deep_agent)
 │           ├── task_agent.py
 │           ├── project_agent.py
 │           ├── note_agent.py
 │           └── timeline_agent.py
 │
 ├── agent-service/
 │   ├── Dockerfile
 │   ├── requirements.txt
 │   └── app/
 │       ├── main.py
 │       ├── config.py
 │       ├── db.py
 │       ├── models.py           # agent_run_logs, local/cloud_agent_configs
 │       ├── routes/
 │       │   ├── agents.py       # catalog, can-create, trigger
 │       │   └── agent_setup.py  # journey start/message
 │       ├── core/
 │       │   ├── agent_runner.py     # Batch classify → process
 │       │   ├── agent_registry.py
 │       │   ├── redis_executor.py   # execute_on_client via Redis pub/sub
 │       │   └── llm.py
 │       └── agents/
 │           ├── task_agent.py       # Tool definitions (batch context)
 │           ├── project_agent.py
 │           ├── note_agent.py
 │           ├── timeline_agent.py
 │           └── filesystem_agent.py
 │
 ├── billing-service/
 │   ├── Dockerfile
 │   ├── requirements.txt
 │   └── app/
 │       ├── main.py
 │       ├── config.py
 │       ├── db.py
 │       ├── models.py           # subscriptions
 │       ├── routes/
 │       │   └── billing.py
 │       └── services/
 │           ├── stripe_service.py
 │           └── tier_manager.py
 │
 └── infra/
    ├── traefik/
    │   └── traefik.yml
    ├── keys/
    │   ├── jwt_private.pem     # Solo auth-service
    │   └── jwt_public.pem      # Tutti i servizi
    └── alembic/                # Migrazioni condivise o per-servizio
 ```
 ---
 ## 5. Docker Compose — Configurazione MVP
 ```yaml
 # docker-compose.yml
 services:
  # ══════════════════════════════════════════════════════════
  # API Gateway
  # ══════════════════════════════════════════════════════════
  traefik:
    image: traefik:v3.2
    command:
      - "--api.insecure=true"
      - "--providers.docker=true"
      - "--providers.docker.exposedbydefault=false"
      - "--entrypoints.web.address=:80"
      - "--entrypoints.websecure.address=:443"
      - "--entrypoints.web.http.redirections.entrypoint.to=websecure"
    ports:
      - "80:80"
      - "443:443"
      - "8080:8080"   # Dashboard Traefik (disabilitare in prod)
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - ./infra/certs:/certs:ro
    restart: unless-stopped
  # ══════════════════════════════════════════════════════════
  # Auth Service (2 repliche)
  # ══════════════════════════════════════════════════════════
  auth-service:
    build: ./auth-service
    deploy:
      replicas: 2
    env_file: .env
    environment:
      DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/adiuva
      REDIS_URL: redis://redis:6379
      JWT_PRIVATE_KEY_FILE: /run/secrets/jwt_private_key
      SERVICE_NAME: auth
    secrets:
      - jwt_private_key
      - jwt_public_key
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.auth.rule=PathPrefix(`/api/v1/auth`)"
      - "traefik.http.services.auth.loadbalancer.server.port=8000"
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
  # ══════════════════════════════════════════════════════════
  # Chat Service — Real-time WS + Chat (scalabile)
  # ══════════════════════════════════════════════════════════
  chat-service:
    build: ./chat-service
    deploy:
      replicas: 2
    env_file: .env
    environment:
      DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/adiuva
      REDIS_URL: redis://redis:6379
      JWT_PUBLIC_KEY_FILE: /run/secrets/jwt_public_key
      SERVICE_NAME: chat
    secrets:
      - jwt_public_key
    labels:
      - "traefik.enable=true"
      # REST chat endpoint
      - "traefik.http.routers.chat.rule=PathPrefix(`/api/v1/chat`)"
      - "traefik.http.services.chat.loadbalancer.server.port=8000"
      # WebSocket route con sticky session
      - "traefik.http.routers.ws.rule=PathPrefix(`/api/v1/ws`)"
      - "traefik.http.routers.ws.service=chat-ws"
      - "traefik.http.services.chat-ws.loadbalancer.server.port=8000"
      - "traefik.http.services.chat-ws.loadbalancer.sticky.cookie.name=ws_affinity"
      - "traefik.http.services.chat-ws.loadbalancer.sticky.cookie.httpOnly=true"
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
  # ══════════════════════════════════════════════════════════
  # Agent Service — Batch processing (scalabile indipendentemente)
  # ══════════════════════════════════════════════════════════
  agent-service:
    build: ./agent-service
    deploy:
      replicas: 2
    env_file: .env
    environment:
      DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/adiuva
      REDIS_URL: redis://redis:6379
      JWT_PUBLIC_KEY_FILE: /run/secrets/jwt_public_key
      SERVICE_NAME: agent
    secrets:
      - jwt_public_key
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.agents.rule=PathPrefix(`/api/v1/agents`)"
      - "traefik.http.services.agents.loadbalancer.server.port=8000"
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
  # ══════════════════════════════════════════════════════════
  # Billing Service (1 replica)
  # ══════════════════════════════════════════════════════════
  billing-service:
    build: ./billing-service
    deploy:
      replicas: 1
    env_file: .env
    environment:
      DATABASE_URL: postgresql+asyncpg://postgres:postgres@db:5432/adiuva
      REDIS_URL: redis://redis:6379
      JWT_PUBLIC_KEY_FILE: /run/secrets/jwt_public_key
      SERVICE_NAME: billing
    secrets:
      - jwt_public_key
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.billing.rule=PathPrefix(`/api/v1/billing`)"
      - "traefik.http.services.billing.loadbalancer.server.port=8000"
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
  # ══════════════════════════════════════════════════════════
  # Infrastruttura
  # ══════════════════════════════════════════════════════════
  db:
    image: pgvector/pgvector:pg16
    environment:
      POSTGRES_USER: postgres
      POSTGRES_PASSWORD: postgres
      POSTGRES_DB: adiuva
    volumes:
      - postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 5s
      timeout: 5s
      retries: 5
    restart: unless-stopped
  redis:
    image: redis:7-alpine
    command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
    volumes:
      - redis_data:/data
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 5s
      timeout: 3s
      retries: 5
    restart: unless-stopped
  qdrant:
    image: qdrant/qdrant:latest
    volumes:
      - qdrant_data:/qdrant/storage
    restart: unless-stopped
 secrets:
  jwt_private_key:
    file: ./infra/keys/jwt_private.pem
  jwt_public_key:
    file: ./infra/keys/jwt_public.pem
 volumes:
  postgres_data:
  redis_data:
  qdrant_data:
 ```
 ---
 ## 6. Configurazione Cloudflare + VPS
 ### 6.1 DNS
 ```
 api.tuodominio.com  →  A record  →  IP del VPS
                    →  Proxy: ON (orange cloud)
 ```
 ### 6.2 Cloudflare Settings
 | Setting | Valore | Motivo |
 |---------|--------|--------|
 | SSL/TLS mode | **Full (Strict)** | Cloudflare ↔ VPS con certificato valido |
 | WebSocket | **ON** | Necessario per `/api/v1/ws/device` |
 | Proxy timeout | **100s** (Enterprise) o default | Le LLM calls possono durare 30s+ |
 | Under Attack Mode | Off (attivare se necessario) | |
 ### 6.3 TLS sul VPS
 Due opzioni:
 - **Opzione A (consigliata)**: Cloudflare Origin Certificate → montato in Traefik
 - **Opzione B**: Let's Encrypt via Traefik (con DNS challenge Cloudflare)
 ```yaml
 # traefik.yml — con Cloudflare Origin Certificate
 entryPoints:
  websecure:
    address: ":443"
 tls:
  certificates:
    - certFile: /certs/origin.pem
      keyFile: /certs/origin-key.pem
 ```
 ### 6.4 Rete VPS
 ```bash
 # UFW firewall — solo Cloudflare può raggiungere le porte 80/443
 # https://www.cloudflare.com/ips/
 ufw default deny incoming
 ufw allow from 173.245.48.0/20 to any port 443
 ufw allow from 103.21.244.0/22 to any port 443
 # ... (tutti gli IP range di Cloudflare)
 ufw allow ssh
 ufw enable
 ```
 ---
 ## 7. Comunicazione Inter-Servizio
 ### 7.1 Redis Pub/Sub — Event Bus
 ```
 ┌──────────┐  tier_changed:user_123   ┌──────────┐
 │ Billing  │ ────────────────────────► │   Auth   │
 │ Service  │                           │ Service  │
 └──────────┘                           └──────────┘
 ┌──────────┐  tool_call:user_123      ┌──────────┐
 │  Agent   │ ────────────────────────► │   Chat   │
 │ Service  │                           │ Service  │
 │ (batch)  │ ◄────────────────────────│ (ha WS)  │
 └──────────┘  tool_result:{call_id}    └──────────┘
 ```
 ### 7.2 Health Checks e Service Discovery
 Traefik gestisce automaticamente il service discovery via Docker labels. I servizi non devono conoscersi tra loro — comunicano solo via:
 - **Redis pub/sub** (tool-call cross-instance, tier events)
 - **Redis hash** (stato condiviso: `ws:connections`, rate-limit counters)
 - **PostgreSQL** (dati persistenti condivisi)
 ---
 ## 8. Piano di Migrazione Incrementale (MVP)
 ### Fase 1 — Preparazione (nel monolite attuale)
 1. Aggiungere Redis al `docker-compose.yml` attuale
 2. Migrare JWT da HS256 → RS256 (backward-compatible: accetta entrambi per un periodo)
 3. Implementare `RedisDeviceManager` come drop-in replacement del singleton in-memory
 4. Estrarre `shared/` con auth verification, schemas, middleware
 ### Fase 2 — Auth Service (primo split)
 1. Estrarre `auth.py` routes + models in `auth-service/`
 2. Verificare che i JWT firmati da `auth-service` vengano validati dal monolite
 3. Aggiungere Traefik e routare `/api/v1/auth/*` al nuovo servizio
 4. Il monolite continua a servire tutto il resto
 ### Fase 3 — Billing Service
 1. Estrarre billing routes, Stripe service, tier manager
 2. Configurare Redis pub/sub per `tier_changed` events
 3. Routare via Traefik
 ### Fase 4 — Split Chat + Agent (il più delicato)
 1. Il monolite residuo contiene WS + chat + agents
 2. Separare Agent Service: estrarre `agent_runner`, `agent_registry`, `agent_setup`, route `/agents/*`
 3. Implementare `redis_executor.py` nell'Agent Service per tool-call via Redis
 4. Il Chat Service resta proprietario della WS e sottoscrive i canali `tool_call:{user_id}`
 5. Testare: trigger agent dall'Agent Service → tool_call via Redis → Chat Service → WS → device → risposta
 ### Fase 5 — Scaling test
 1. Scalare Chat Service a 2 repliche, verificare sticky sessions
 2. Scalare Agent Service a 2 repliche, verificare batch processing distribuito
 3. Monitoring (Prometheus + Grafana) per ogni servizio
 ---
 ## 9. Monitoraggio e Logging
 ```yaml
 # Aggiungere al docker-compose.yml
  prometheus:
    image: prom/prometheus:latest
    volumes:
      - ./infra/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
    restart: unless-stopped
  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    volumes:
      - grafana_data:/var/lib/grafana
    restart: unless-stopped
  loki:
    image: grafana/loki:latest
    restart: unless-stopped
 ```
 Ogni servizio espone `/metrics` (Prometheus) e scrive log strutturati (JSON) raccolti da Loki.
 ---
 ## 10. Sizing VPS Minimo Consigliato (MVP)
 | Componente | CPU | RAM | Note |
 |---|---|---|---|
 | Traefik | 0.25 | 128MB | |
 | Auth Service ×2 | 0.25 ×2 | 128MB ×2 | Stateless, leggero |
 | Chat Service ×2 | 1.0 ×2 | 1GB ×2 | WS + streaming LLM |
 | Agent Service ×2 | 0.75 ×2 | 512MB ×2 | Batch LLM, CPU-bound |
 | Billing Service | 0.25 | 128MB | |
 | PostgreSQL | 1.0 | 1GB | |
 | Redis | 0.25 | 256MB | |
 | Qdrant | 0.5 | 512MB | |
 | **Totale MVP** | **~5.5 vCPU** | **~5 GB** | |
 **Raccomandazione**: VPS con **8 vCPU / 16 GB RAM** per avere margine. Hetzner CPX41 (~€30/mese) o equivalente. Senza Storage/Plugin si risparmia ~1 vCPU e 512MB rispetto alla versione completa.
 ---
 ## Riepilogo Architettura MVP
 | Servizio | Repliche | Proprietario di |
 |---|---|---|
 | **Traefik** | 1 | Routing, TLS, sticky sessions |
 | **Auth Service** | 2 | JWT RS256, registrazione, login, profilo |
 | **Chat Service** | 2–N | WebSocket, home/floating chat, streaming |
 | **Agent Service** | 2–N | Batch processing, directory scan, agent setup |
 | **Billing Service** | 1 | Stripe, subscriptions, tier management |
 | Decisione | Scelta | Motivazione |
 |---|---|---|
 | API Gateway | Traefik | Nativo Docker, WebSocket support, service discovery automatico |
 | JWT | RS256 (asimmetrico) | Verifica distribuita senza contattare Auth Service |
 | Tier check | Claim nel JWT | Ogni servizio verifica localmente, zero roundtrip |
 | WebSocket scaling | Redis pub/sub + sticky cookies | Cross-instance tool-call routing |
 | Chat ↔ Agent split | Servizi separati | Batch CPU-bound non impatta real-time chat |
 | Agent → Device comms | Redis pub/sub via Chat Service | Agent non possiede la WS, usa un relay |
 | Rate limiting | Redis contatori distribuiti | Sliding window condivisa tra repliche |
 | Database | PostgreSQL condiviso | Semplicità MVP; split DB futuro facile |
 | TLS | Cloudflare Origin Certificate | Zero maintenance |
 | Orchestrazione | Docker Compose | Sufficiente per un singolo VPS |
 | Storage / Plugin | Post-MVP | Non critici per il lancio |
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,43 @@
 fastapi>=0.115.0
 uvicorn[standard]>=0.34.0
 gunicorn>=22.0.0
 langchain>=0.3.0
 langchain-openai>=0.3.0
 langchain-litellm>=0.1.0
 litellm>=1.50.0
 pydantic>=2.10.0
 pydantic-settings>=2.7.0
 python-jose[cryptography]>=3.3.0
 stripe>=11.0.0
 boto3>=1.35.0
 slowapi>=0.1.9
 sqlalchemy>=2.0.0
 asyncpg>=0.30.0
 alembic>=1.14.0
 bcrypt>=4.2.0
 python-dotenv>=1.0.0
 httpx>=0.28.0
 websockets>=14.0
 psycopg2-binary>=2.9.0
 pytest>=8.0.0
 pytest-asyncio>=0.24.0
 aiosqlite>=0.20.0
 moto[s3]>=5.0.0
 pinecone>=5.0.0
 qdrant-client>=1.7.0
 croniter>=3.0.0
 google-api-python-client>=2.130.0
 google-auth>=2.29.0
 google-auth-oauthlib>=1.2.0
 google-auth-httplib2>=0.2.0
 msal>=1.28.0
 cryptography>=42.0.0
 pgvector>=0.2.5
 langfuse>=3.3.1
 beautifulsoup4>=4.12.0
 lxml>=5.0.0
 PyYAML>=6.0.0
 apscheduler>=3.10.0
 ruff>=0.8.0
 pypdf>=4.0
 python-docx>=1.1
--- a/results.xml
+++ b/results.xml
--- a/services/auth/.env.example
+++ b/services/auth/.env.example
@@ -1,19 +0,0 @@
 # ── Auth Service ──────────────────────────────────────────────────────────────
 # This file contains env vars specific to the Auth Service.
 # Shared vars (DATABASE_URL, REDIS_URL, etc.) come from the root .env
 # or from docker-compose environment.
 # ── JWT RS256 Keys ────────────────────────────────────────────────────────────
 # Generate keypair:
 #   openssl genpkey -algorithm RSA -out private.pem -pkeyopt rsa_keygen_bits:2048
 #   openssl rsa -in private.pem -pubout -out public.pem
 #
 # Paste PEM content with literal \n for newlines:
 #   JWT_PRIVATE_KEY=-----BEGIN PRIVATE KEY-----\nMIIEvQ...
 #   JWT_PUBLIC_KEY=-----BEGIN PUBLIC KEY-----\nMIIBIj...
 # PRIVATE KEY — used to SIGN JWTs. NEVER share outside this service.
 JWT_PRIVATE_KEY=
 # PUBLIC KEY — used to VERIFY JWTs.
 JWT_PUBLIC_KEY=
--- a/services/auth/Dockerfile
+++ b/services/auth/Dockerfile
@@ -1,36 +0,0 @@
 # ── builder ──────────────────────────────────────────────────────────────────
 FROM python:3.12-slim AS builder
 WORKDIR /build
 # Install shared + service deps in one layer
 COPY services/auth/requirements.txt ./requirements.txt
 RUN pip install --upgrade pip && \
    pip install --no-cache-dir --prefix=/install -r requirements.txt
 # ── runtime ──────────────────────────────────────────────────────────────────
 FROM python:3.12-slim AS runtime
 RUN addgroup --system appgroup && adduser --system --ingroup appgroup appuser
 WORKDIR /app
 COPY --from=builder /install /usr/local
 # Copy shared module (available to all services)
 COPY shared/ shared/
 # Copy service source
 COPY services/auth/app/ app/
 RUN chown -R appuser:appgroup /app
 USER appuser
 EXPOSE 8000
 CMD ["gunicorn", "app.main:app", \
     "-k", "uvicorn.workers.UvicornWorker", \
     "--bind", "0.0.0.0:8000", \
     "--workers", "2", \
     "--timeout", "30"]
--- a/services/auth/README.md
+++ b/services/auth/README.md
@@ -1,16 +0,0 @@
 # Auth Service
 Owns: user registration, login, JWT RS256 issuance, token refresh, `/me` endpoint.
 ## Tables owned
 - `users`
 - `refresh_tokens`
 - `subscriptions` (read; Billing Service writes)
 ## Endpoints
 - `POST /auth/register`
 - `POST /auth/login`
 - `POST /auth/refresh`
 - `GET /auth/me`
 - `PUT /auth/me`
 - `GET /auth/verify` (ForwardAuth for Traefik)
--- a/services/auth/app/config.py
+++ b/services/auth/app/config.py
@@ -1,34 +0,0 @@
 """Auth Service — local configuration.
 Contains secrets that ONLY the Auth Service needs (e.g., JWT private key).
 These are NOT in shared/config.py to prevent other services from accessing them.
 """
 from pydantic import field_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 class AuthSettings(BaseSettings):
    # RS256 private key (PEM format). Used to SIGN JWTs.
    # Only the Auth Service has this. Generate with:
    #   openssl genpkey -algorithm RSA -out private.pem -pkeyopt rsa_keygen_bits:2048
    # Then set the env var (newlines as \n):
    #   JWT_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----\nMIIEv..."
    JWT_PRIVATE_KEY: str = ""
    # RS256 public key (PEM format). Used to VERIFY JWTs.
    # Derived from the private key:
    #   openssl rsa -in private.pem -pubout -out public.pem
    JWT_PUBLIC_KEY: str = ""
    @field_validator("JWT_PRIVATE_KEY", "JWT_PUBLIC_KEY", mode="before")
    @classmethod
    def _expand_pem_newlines(cls, v: str) -> str:
        if isinstance(v, str) and r"\n" in v:
            return v.replace(r"\n", "\n")
        return v
    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
 auth_settings = AuthSettings()
--- a/services/auth/app/deps.py
+++ b/services/auth/app/deps.py
@@ -1,69 +0,0 @@
 """Auth dependencies — JWT validation for the Auth Service.
 This is the canonical get_current_user used by protected endpoints
 within the Auth Service itself (/me, /me PUT).
 """
 from __future__ import annotations
 from fastapi import Depends, HTTPException, status
 from fastapi.security import OAuth2PasswordBearer
 from jose import JWTError, jwt
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from shared.config import settings
 from shared.db import get_session
 from shared.models import Subscription, User
 from shared.schemas import UserProfile
 from app.config import auth_settings
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/v1/auth/login")
 async def get_current_user(
    token: str = Depends(oauth2_scheme),
    db: AsyncSession = Depends(get_session),
 ) -> UserProfile:
    """Validate a Bearer JWT and return the authenticated user.
    The JWT is used for identity and expiry.  Tier is fetched live from the
    subscriptions table so upgrades/downgrades take effect immediately.
    """
    credentials_exc = HTTPException(
        status_code=status.HTTP_401_UNAUTHORIZED,
        detail="Could not validate credentials",
        headers={"WWW-Authenticate": "Bearer"},
    )
    try:
        payload = jwt.decode(
            token, auth_settings.JWT_PUBLIC_KEY, algorithms=["RS256"]
        )
        user_id: str | None = payload.get("sub")
        email: str | None = payload.get("email")
        if not user_id or not email:
            raise credentials_exc
    except JWTError:
        raise credentials_exc
    # Live tier lookup
    result = await db.execute(
        select(Subscription.tier).where(Subscription.user_id == user_id)
    )
    default_tier = "power" if settings.ENV == "dev" else "free"
    tier: str = result.scalar_one_or_none() or default_tier
    # Fetch name/surname
    user_result = await db.execute(
        select(User.name, User.surname).where(User.id == user_id)
    )
    user_row = user_result.one_or_none()
    return UserProfile(
        id=user_id,
        email=email,
        name=user_row.name if user_row else None,
        surname=user_row.surname if user_row else None,
        tier=tier,
    )  # type: ignore[arg-type]
--- a/services/auth/app/main.py
+++ b/services/auth/app/main.py
@@ -1,62 +0,0 @@
 """Auth Service — JWT issuance, user management, ForwardAuth verification.
 Standalone FastAPI service extracted from the adiuva-api monolith.
 Owns: users, refresh_tokens, subscriptions (read).
 """
 import sys
 from contextlib import asynccontextmanager
 from pathlib import Path
 # Ensure the repo root is on sys.path so "shared" is importable.
 # In Docker, COPY shared/ puts it at /app/shared/ (already importable).
 # In local dev, we need to add the repo root (two levels up from this file).
 _repo_root = str(Path(__file__).resolve().parents[3])
 if _repo_root not in sys.path:
    sys.path.insert(0, _repo_root)
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from shared.config import settings
@asynccontextmanager
 async def lifespan(app: FastAPI):
    yield
    from shared.db import engine
    await engine.dispose()
 def create_app() -> FastAPI:
    app = FastAPI(
        title="Adiuva Auth Service",
        version="0.1.0",
        docs_url="/docs" if settings.ENV == "dev" else None,
        redoc_url=None,
        lifespan=lifespan,
    )
    app.add_middleware(
        CORSMiddleware,
        allow_origins=settings.CORS_ORIGINS,
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
    from app.routes import router
    from app.verify import router as verify_router
    app.include_router(router, prefix="/api/v1")
    app.include_router(verify_router, prefix="/api/v1")
    @app.get("/api/v1/health", tags=["health"])
    async def health() -> dict:
        return {"status": "ok", "service": "auth", "version": app.version}
    return app
 app = create_app()
--- a/services/auth/app/routes.py
+++ b/services/auth/app/routes.py
@@ -1,249 +0,0 @@
 """Auth routes: register, login, refresh, me.
 Extracted from app/api/routes/auth.py — uses shared.* imports instead of app.*.
 """
 from __future__ import annotations
 import hashlib
 import time
 import uuid
 from datetime import datetime, timedelta, timezone
 import bcrypt
 from cryptography.fernet import Fernet
 from fastapi import APIRouter, Depends, HTTPException, status
 from jose import jwt
 from pydantic import BaseModel
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from shared.config import settings
 from shared.db import get_session
 from shared.models import RefreshToken, Subscription, User
 from shared.schemas import AuthTokens, UserProfile
 from app.config import auth_settings
 from app.deps import get_current_user
 router = APIRouter(prefix="/auth", tags=["auth"])
 # ── Internal helpers ─────────────────────────────────────────────────
 def _hash_password(password: str) -> str:
    return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
 def _verify_password(password: str, hashed: str) -> bool:
    return bcrypt.checkpw(password.encode(), hashed.encode())
 def _hash_token(plain_token: str) -> str:
    """SHA-256 of the plain refresh token string."""
    return hashlib.sha256(plain_token.encode()).hexdigest()
 def _make_access_token(user_id: str, email: str, tier: str) -> tuple[str, int]:
    """Return (RS256-signed JWT, expires_at_ms)."""
    now = int(time.time())
    exp = now + settings.JWT_ACCESS_TOKEN_EXPIRE_MINUTES * 60
    payload = {
        "sub": user_id,
        "email": email,
        "tier": tier,
        "exp": exp,
        "iat": now,
    }
    token = jwt.encode(payload, auth_settings.JWT_PRIVATE_KEY, algorithm="RS256")
    return token, exp * 1000  # ms for client
 async def _get_live_tier(db: AsyncSession, user_id: str) -> str:
    """Fetch authoritative tier from subscriptions table."""
    result = await db.execute(
        select(Subscription.tier).where(Subscription.user_id == user_id)
    )
    default_tier = "power" if settings.ENV == "dev" else "free"
    return result.scalar_one_or_none() or default_tier
 # ── Request bodies ────────────────────────────────────────────────────
 class _RegisterRequest(BaseModel):
    email: str
    password: str
    name: str | None = None
    surname: str | None = None
 class _LoginRequest(BaseModel):
    email: str
    password: str
 class _RefreshRequest(BaseModel):
    refresh_token: str
 class _UpdateProfileRequest(BaseModel):
    name: str | None = None
    surname: str | None = None
 # ── Routes ────────────────────────────────────────────────────────────
@router.post("/register", response_model=AuthTokens, status_code=status.HTTP_201_CREATED)
 async def register(
    body: _RegisterRequest,
    db: AsyncSession = Depends(get_session),
 ) -> AuthTokens:
    """Create a new account and return JWT tokens."""
    existing = await db.execute(select(User).where(User.email == body.email))
    if existing.scalar_one_or_none() is not None:
        raise HTTPException(status.HTTP_409_CONFLICT, "Email already registered")
    user = User(
        id=str(uuid.uuid4()),
        email=body.email,
        name=body.name,
        surname=body.surname,
        password_hash=_hash_password(body.password),
        tier="free",
        encryption_key=Fernet.generate_key().decode(),
    )
    db.add(user)
    await db.flush()
    plain_token = str(uuid.uuid4())
    expires_at = datetime.now(timezone.utc) + timedelta(
        days=settings.JWT_REFRESH_TOKEN_EXPIRE_DAYS
    )
    rt = RefreshToken(
        user_id=user.id,
        token_hash=_hash_token(plain_token),
        expires_at=expires_at,
    )
    db.add(rt)
    await db.commit()
    access_token, expires_at_ms = _make_access_token(user.id, user.email, user.tier)
    return AuthTokens(
        access_token=access_token,
        refresh_token=plain_token,
        expires_at=expires_at_ms,
    )
@router.post("/login", response_model=AuthTokens)
 async def login(
    body: _LoginRequest,
    db: AsyncSession = Depends(get_session),
 ) -> AuthTokens:
    """Validate credentials and return JWT tokens."""
    result = await db.execute(select(User).where(User.email == body.email))
    user = result.scalar_one_or_none()
    if user is None or not _verify_password(body.password, user.password_hash):
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid credentials")
    # Fetch live tier for the JWT claim
    tier = await _get_live_tier(db, user.id)
    plain_token = str(uuid.uuid4())
    expires_at = datetime.now(timezone.utc) + timedelta(
        days=settings.JWT_REFRESH_TOKEN_EXPIRE_DAYS
    )
    rt = RefreshToken(
        user_id=user.id,
        token_hash=_hash_token(plain_token),
        expires_at=expires_at,
    )
    db.add(rt)
    await db.commit()
    access_token, expires_at_ms = _make_access_token(user.id, user.email, tier)
    return AuthTokens(
        access_token=access_token,
        refresh_token=plain_token,
        expires_at=expires_at_ms,
    )
@router.post("/refresh", response_model=AuthTokens)
 async def refresh(
    body: _RefreshRequest,
    db: AsyncSession = Depends(get_session),
 ) -> AuthTokens:
    """Rotate a refresh token and return a new token pair."""
    token_hash = _hash_token(body.refresh_token)
    result = await db.execute(
        select(RefreshToken).where(RefreshToken.token_hash == token_hash)
    )
    rt = result.scalar_one_or_none()
    now = datetime.now(timezone.utc)
    if rt is None or rt.expires_at.replace(tzinfo=timezone.utc) < now:
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid or expired refresh token")
    await db.delete(rt)
    user_result = await db.execute(select(User).where(User.id == rt.user_id))
    user = user_result.scalar_one_or_none()
    if user is None:
        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "User not found")
    # Fetch live tier for the new JWT
    tier = await _get_live_tier(db, user.id)
    plain_token = str(uuid.uuid4())
    new_expires = now + timedelta(days=settings.JWT_REFRESH_TOKEN_EXPIRE_DAYS)
    new_rt = RefreshToken(
        user_id=user.id,
        token_hash=_hash_token(plain_token),
        expires_at=new_expires,
    )
    db.add(new_rt)
    await db.commit()
    access_token, expires_at_ms = _make_access_token(user.id, user.email, tier)
    return AuthTokens(
        access_token=access_token,
        refresh_token=plain_token,
        expires_at=expires_at_ms,
    )
@router.get("/me", response_model=UserProfile)
 async def me(current_user: UserProfile = Depends(get_current_user)) -> UserProfile:
    """Return the profile for the authenticated user."""
    return current_user
@router.put("/me", response_model=UserProfile)
 async def update_profile(
    body: _UpdateProfileRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> UserProfile:
    """Update the authenticated user's name and surname."""
    result = await db.execute(select(User).where(User.id == current_user.id))
    user = result.scalar_one()
    if body.name is not None:
        user.name = body.name
    if body.surname is not None:
        user.surname = body.surname
    await db.commit()
    await db.refresh(user)
    return UserProfile(
        id=user.id,
        email=user.email,
        name=user.name,
        surname=user.surname,
        tier=current_user.tier,
    )
--- a/services/auth/app/verify.py
+++ b/services/auth/app/verify.py
@@ -1,66 +0,0 @@
 """ForwardAuth verification endpoint for Traefik.
 Traefik calls GET /api/v1/auth/verify on every request to a protected
 service.  This endpoint validates the JWT from the Authorization header
 and returns identity headers that Traefik injects into downstream requests.
 Downstream services NEVER validate JWTs themselves — they trust the
 X-User-Id, X-User-Email, X-User-Tier headers injected by Traefik.
 """
 from __future__ import annotations
 from fastapi import APIRouter, Request, Response
 from fastapi import status as http_status
 from jose import JWTError, jwt
 from sqlalchemy import select
 from shared.config import settings
 from shared.db import async_session
 from shared.models import Subscription
 from app.config import auth_settings
 router = APIRouter(tags=["auth"])
@router.get("/auth/verify")
 async def verify(request: Request) -> Response:
    """Validate JWT and return identity headers for Traefik ForwardAuth.
    Returns 200 with X-User-* headers on success, 401 on failure.
    Traefik copies response headers to the downstream request.
    """
    auth_header = request.headers.get("Authorization", "")
    if not auth_header.startswith("Bearer "):
        return Response(status_code=http_status.HTTP_401_UNAUTHORIZED)
    token = auth_header[7:]  # strip "Bearer "
    try:
        payload = jwt.decode(
            token, auth_settings.JWT_PUBLIC_KEY, algorithms=["RS256"]
        )
        user_id: str | None = payload.get("sub")
        email: str | None = payload.get("email")
        if not user_id or not email:
            return Response(status_code=http_status.HTTP_401_UNAUTHORIZED)
    except JWTError:
        return Response(status_code=http_status.HTTP_401_UNAUTHORIZED)
    # Live tier lookup from subscriptions table
    async with async_session() as db:
        result = await db.execute(
            select(Subscription.tier).where(Subscription.user_id == user_id)
        )
        default_tier = "power" if settings.ENV == "dev" else "free"
        tier: str = result.scalar_one_or_none() or default_tier
    return Response(
        status_code=http_status.HTTP_200_OK,
        headers={
            "X-User-Id": user_id,
            "X-User-Email": email,
            "X-User-Tier": tier,
        },
    )
--- a/services/auth/requirements.txt
+++ b/services/auth/requirements.txt
@@ -1,11 +0,0 @@
 fastapi>=0.115.0
 uvicorn[standard]>=0.34.0
 gunicorn>=22.0.0
 pydantic>=2.10.0
 pydantic-settings>=2.7.0
 python-jose[cryptography]>=3.3.0
 sqlalchemy>=2.0.0
 asyncpg>=0.30.0
 bcrypt>=4.2.0
 cryptography>=42.0.0
 python-dotenv>=1.0.0
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1 @@`
							`"OAuth provider abstractions and utilities."`