From 41db3a7089e7f42503d66327007b26fb127630f6 Mon Sep 17 00:00:00 2001
From: Roberto Musso <roberto.musso@hpe.com>
Date: Wed, 8 Apr 2026 23:52:52 +0200
Subject: [PATCH] update env variables

---
 .env.example                                  |   9 +-
 README.md                                     | 591 ------------------
 alembic/versions/003_agent_tables.py          |   2 +-
 ...d1e2f3_add_agent_config_to_local_agents.py |  90 ++-
 app/config/settings.py                        |   3 +-
 app/core/langfuse_client.py                   |   4 +-
 app/core/llm.py                               |  12 +-
 7 files changed, 93 insertions(+), 618 deletions(-)

diff --git a/.env.example b/.env.example
index 576794b..d8d134d 100644
--- a/.env.example
+++ b/.env.example
@@ -16,8 +16,7 @@ JWT_REFRESH_TOKEN_EXPIRE_DAYS=30
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
 GOOGLE_API_KEY=
-LLM_MODEL=gpt-4o
-LLM_ROUTER_MODEL=gpt-4o-mini
+LLM_MODEL=gpt-5-mini
 
 # ── Stripe (leave empty to stub billing) ──────────────────────────────────────
 STRIPE_SECRET_KEY=
@@ -27,9 +26,9 @@ STRIPE_WEBHOOK_SECRET=
 # ── Langfuse (leave empty to disable observability) ───────────────────────────
 LANGFUSE_SECRET_KEY=
 LANGFUSE_PUBLIC_KEY=
-# LANGFUSE_HOST=https://cloud.langfuse.com        # EU (default)
-# LANGFUSE_HOST=https://us.cloud.langfuse.com     # US
-# LANGFUSE_HOST=http://localhost:3000             # Self-hosted
+# LANGFUSE_BASE_URL=https://cloud.langfuse.com        # EU (default)
+# LANGFUSE_BASE_URL=https://us.cloud.langfuse.com     # US
+# LANGFUSE_BASE_URL=http://localhost:3000             # Self-hosted
 
 # ── CORS ──────────────────────────────────────────────────────────────────────
 # Comma-separated list parsed by Settings (override default if needed)
diff --git a/README.md b/README.md
index 1b6c19a..e69de29 100644
--- a/README.md
+++ b/README.md
@@ -1,591 +0,0 @@
-# AdiuvAI Cloud API
-
-**AI-powered project management backend with LLM orchestration and subscription billing.**
-
-Built with FastAPI · Python 3.12 · PostgreSQL · LangChain · Stripe
-
----
-
-## Table of Contents
-
-- [Overview](#overview)
-- [Architecture](#architecture)
-- [Key Features](#key-features)
-- [Tech Stack](#tech-stack)
-- [Getting Started](#getting-started)
-- [Docker Deployment](#docker-deployment)
-- [Environment Variables](#environment-variables)
-- [API Reference](#api-reference)
-- [Data Model](#data-model)
-- [AI Agent System](#ai-agent-system)
-- [Orchestration & Execution Plans](#orchestration--execution-plans)
-- [Middleware](#middleware)
-- [Billing & Tiers](#billing--tiers)
-- [Testing](#testing)
-- [Project Structure](#project-structure)
-- [License](#license)
-
----
-
-## Overview
-
-AdiuvAI Cloud API is the FastAPI backend that powers the **AdiuvAI Electron desktop app**. It provides LLM-powered chat orchestration, text embedding generation, and Stripe-based subscription billing across four tiers.
-
-### Design Principles
-
-1. **Never expose prompts** — system prompts stay server-side; responses are sanitized to strip any leaked prompt fragments.
-2. **Stateless request handling** — all context comes from the client and JWT; no server-side session state.
-3. **Tier gates enforced server-side** — the server always reads the current tier from the database, never trusting client-reported values.
-
----
-
-## Architecture
-
-```
-┌──────────────┐      ┌────────────────────────────────────────────────────────┐
-│  Electron    │      │  FastAPI  (Uvicorn / Gunicorn)                         │
-│  Desktop App │────▶│                                                        │
-│  (Client)    │◀────│  Middleware: RateLimit → Sanitizer → CORS → Router     │
-└──────────────┘      │                                                        │
-                      │  ┌──────────────────┐  ┌────────────────────────────┐  │
-                      │  │  Auth Routes     │  │  Chat Routes               │  │
-                      │  │  Billing Routes  │  │    ↓                       │  │
-                      │  │  Agent Routes    │  │  Orchestrator (GPT-4o-mini)│  │
-                      │  │  Device WS       │  │    ↓ classify intent       │  │
-                      │  └──────────────────┘  │  Agent Registry            │  │
-                      │                        │    ↓                       │  │
-                      │                        │  TaskAgent  | ProjectAgent │  │
-                      │                        │  NoteAgent  | CheckptAgent │  │
-                      │                        │  (GPT-4o + LangChain)      │  │
-                      │                        └────────────────────────────┘  │
-                      └────────────────────────────────────────────────────────┘
-                               │
-                      ┌────────▼───┐
-                      │ PostgreSQL │
-                      │ (Auth,     │
-                      │  Billing,  │
-                      │  Agents)   │
-                      └────────────┘
-                               │
-                      ┌────────▼───┐
-                      │  Stripe    │
-                      │  (Billing) │
-                      └────────────┘
-```
-
----
-
-## Key Features
-
-1. **LLM-powered orchestration** — GPT-4o-mini classifies user intent and routes to the appropriate domain agent.
-2. **4 specialized AI agents** — Tasks (8 tools), Projects (6 tools), Timelines (4 tools), Notes (5 tools), all powered by GPT-4o via LangChain.
-3. **Execution plans & playbooks** — Server-side prompt template registry; clients receive only opaque template IDs, never raw prompts.
-4. **Text embeddings** — Generates text-embedding-3-small vectors for local client-side note search.
-5. **Stripe billing** — Four-tier subscription model (Free / Pro / Power / Team) with checkout sessions and full webhook lifecycle handling.
-6. **JWT authentication** — Access + refresh tokens with bcrypt password hashing, SHA-256 token hashing, and automatic rotation.
-7. **Prompt IP protection** — Sanitizer middleware strips system prompts, reasoning markers, tool schemas, and agent routing metadata from all chat responses.
-8. **Tier-based rate limiting** — Sliding-window per-user limiter scaling from 20 to 200 requests/min by subscription tier.
-9. **WebSocket streaming** — Real-time chat with 30-second heartbeat keep-alive and chunked text delivery.
-10. **Alembic migrations** — Versioned schema management.
-11. **Comprehensive test suite** — In-memory SQLite, per-tier test fixtures, and full API coverage without external dependencies.
-
----
-
-## Tech Stack
-
-| Package | Version | Purpose |
-|---|---|---|
-| `fastapi` | ≥ 0.115.0 | Web framework |
-| `uvicorn[standard]` | ≥ 0.34.0 | ASGI development server |
-| `gunicorn` | ≥ 22.0.0 | Production process manager |
-| `langchain` | ≥ 0.3.0 | LLM orchestration framework |
-| `langchain-openai` | ≥ 0.3.0 | OpenAI LLM provider integration |
-| `litellm` | ≥ 1.50.0 | Universal LLM gateway (100+ providers) |
-| `pydantic` | ≥ 2.10.0 | Data validation and serialization |
-| `pydantic-settings` | ≥ 2.7.0 | Environment-based configuration |
-| `python-jose[cryptography]` | ≥ 3.3.0 | JWT encoding and decoding |
-| `stripe` | ≥ 11.0.0 | Billing and payment integration |
-| `slowapi` | ≥ 0.1.9 | Rate limiting utilities |
-| `sqlalchemy` | ≥ 2.0.0 | Async ORM and query builder |
-| `asyncpg` | ≥ 0.30.0 | PostgreSQL async driver |
-| `alembic` | ≥ 1.14.0 | Database migration management |
-| `bcrypt` | ≥ 4.2.0 | Password hashing |
-| `python-dotenv` | ≥ 1.0.0 | `.env` file loading |
-| `httpx` | ≥ 0.28.0 | Async HTTP client (used in tests) |
-| `websockets` | ≥ 14.0 | WebSocket protocol support |
-| `psycopg2-binary` | ≥ 2.9.0 | Synchronous PostgreSQL driver (Alembic) |
-| `pytest` | ≥ 8.0.0 | Test framework |
-| `pytest-asyncio` | ≥ 0.24.0 | Async test support |
-| `aiosqlite` | ≥ 0.20.0 | In-memory SQLite for tests |
-| `ruff` | ≥ 0.8.0 | Linter and formatter |
-
----
-
-## Getting Started
-
-### Prerequisites
-
-- Python 3.12+
-- PostgreSQL 16+
-- An OpenAI API key (for LLM features)
-- Stripe API keys (optional — billing stubs gracefully when unconfigured)
-
-### Installation
-
-```bash
-# Clone the repository
-git clone <repo-url> && cd adiuvai-api
-
-# Create a virtual environment
-python -m venv .venv && source .venv/bin/activate
-
-# Install dependencies
-pip install -r requirements.txt
-
-# Configure environment
-cp .env.example .env
-# Edit .env with your DATABASE_URL, OPENAI_API_KEY, etc.
-```
-
-### Database Setup
-
-```bash
-# Start PostgreSQL (or use the Docker Compose database)
-docker compose up db -d
-
-# Run migrations
-alembic upgrade head
-```
-
-### Run the Development Server
-
-```bash
-uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
-```
-
-Interactive API docs are available at [http://localhost:8000/docs](http://localhost:8000/docs) in development mode (`ENV=dev`). The `/docs` endpoint is disabled in production.
-
----
-
-## Docker Deployment
-
-### Quick Start
-
-```bash
-docker compose up --build
-```
-
-This starts two services:
-
-- **app** — FastAPI server on port `8000`
-- **db** — PostgreSQL 16 (Alpine) on port `5432` with a persistent volume and health checks
-
-### Dockerfile Details
-
-The Dockerfile uses a multi-stage build:
-
-1. **Builder stage** — Installs Python dependencies into a virtual environment.
-2. **Runtime stage** — Copies only the venv, app source, and Alembic migrations. Runs as a non-root user (`appuser`).
-3. **Production server** — Gunicorn with 4 Uvicorn workers, 120-second timeout, listening on port 8000.
-
-```bash
-# Production command (run by the container)
-gunicorn app.main:app -k uvicorn.workers.UvicornWorker -w 4 --timeout 120 -b 0.0.0.0:8000
-```
-
----
-
-## Homelab / Self-Hosted Deployment
-
-You can run the entire stack locally on a homelab with **no cloud dependencies except the LLM provider**.
-
-### 1. Start all services
-
-```bash
-docker compose up -d
-```
-
-This starts PostgreSQL alongside the app.
-
-### 2. Configure your `.env`
-
-```bash
-# Database (uses the compose PostgreSQL)
-DATABASE_URL=postgresql+asyncpg://postgres:postgres@db:5432/adiuvai
-
-# Billing — leave empty to stub (no Stripe needed)
-STRIPE_SECRET_KEY=
-STRIPE_WEBHOOK_SECRET=
-
-# LLM — the only external service
-OPENAI_API_KEY=sk-...
-LLM_MODEL=gpt-4o
-LLM_ROUTER_MODEL=gpt-4o-mini
-
-# Auth
-JWT_SECRET=your-secret-here
-ENV=dev
-```
-
-### 3. Run migrations
-
-```bash
-docker compose exec app alembic upgrade head
-```
-
-### What runs where
-
-| Service | Runs on | Port | Notes |
-|---|---|---|---|
-| FastAPI app | Docker | 8000 | API server |
-| PostgreSQL | Docker | 5432 | Auth, billing, agents |
-| Stripe | — | — | Stubbed when keys are empty |
-| OpenAI / LLM | Cloud | — | Only external dependency |
-
-> **Want fully offline AI too?** Set `LLM_MODEL=ollama/llama3` and `LLM_ROUTER_MODEL=ollama/llama3`, then add an Ollama container or point at a local Ollama instance. See the [LLM provider switching](#switching-llm-providers) section.
-
----
-
-## Environment Variables
-
-All variables are loaded from a `.env` file via Pydantic Settings. Source: `app/config/settings.py`
-
-| Variable | Type | Default | Description |
-|---|---|---|---|
-| `DATABASE_URL` | `str` | `postgresql+asyncpg://postgres:postgres@localhost:5432/adiuvai` | Async SQLAlchemy connection string |
-| `JWT_SECRET` | `str` | `change-me-in-production` | HMAC secret for JWT signing |
-| `JWT_ALGORITHM` | `str` | `HS256` | JWT signing algorithm |
-| `JWT_ACCESS_TOKEN_EXPIRE_MINUTES` | `int` | `30` | Access token time-to-live |
-| `JWT_REFRESH_TOKEN_EXPIRE_DAYS` | `int` | `30` | Refresh token time-to-live |
-| `STRIPE_SECRET_KEY` | `str` | `""` | Stripe API key (empty = stub mode) |
-| `STRIPE_WEBHOOK_SECRET` | `str` | `\"\"` | Stripe webhook signature secret |\n| `OPENAI_API_KEY` | `str` | `\"\"` | OpenAI key for LLM agent calls |
-| `LLM_MODEL` | `str` | `gpt-4o` | LiteLLM model identifier for agents (e.g. `anthropic/claude-3.5-sonnet`, `gemini/gemini-pro`, `ollama/llama3`) |
-| `LLM_ROUTER_MODEL` | `str` | `gpt-4o-mini` | Lighter model used for intent classification / routing |
-| `CORS_ORIGINS` | `list[str]` | `["app://.", "http://localhost:3000", "http://localhost:5173"]` | Allowed CORS origins |
-| `ENV` | `Literal` | `dev` | `dev` or `prod` — controls `/docs` visibility and SQL echo |
-
----
-
-## API Reference
-
-All routes are prefixed with `/api/v1`. **27 endpoints** total (25 REST + 1 WebSocket + 1 health check).
-
-### Health
-
-| Method | Path | Auth | Description |
-|---|---|---|---|
-| `GET` | `/api/v1/health` | No | Returns `{"status": "ok", "version": "0.1.0"}` |
-
-### Auth
-
-| Method | Path | Auth | Description |
-|---|---|---|---|
-| `POST` | `/api/v1/auth/register` | No | Create account with bcrypt-hashed password, returns `AuthTokens` |
-| `POST` | `/api/v1/auth/login` | No | Validate credentials, returns `AuthTokens` |
-| `POST` | `/api/v1/auth/refresh` | No | Rotate refresh token, returns new `AuthTokens` |
-| `GET` | `/api/v1/auth/me` | JWT | Returns `UserProfile` for the authenticated user |
-
-### Chat
-
-| Method | Path | Auth | Description |
-|---|---|---|---|
-| `POST` | `/api/v1/chat` | JWT | Route message through the orchestrator; returns `ChatResponse` or `ExecutionPlan` depending on execution mode |
-| `POST` | `/api/v1/chat/embed` | JWT | Generate a 1536-dim text embedding vector (`text-embedding-3-small`). Used by Electron for local note search. |
-| `WS` | `/api/v1/chat/stream` | JWT (query param `?token=`) | Streaming chat — first frame is a `ChatRequest`, server yields text chunks, final frame is `{"done": true, "response": "...", "actions": [...]}`. 30-second heartbeat ping. |
-
-### Plans
-
-| Method | Path | Auth | Description |
-|---|---|---|---|
-| `GET` | `/api/v1/plans/playbook` | JWT | List all cached execution plan playbooks |
-| `GET` | `/api/v1/plans/playbook/{plan_id}` | JWT | Retrieve a specific playbook by ID |
-
-### Billing
-
-| Method | Path | Auth | Description |
-|---|---|---|---|
-| `POST` | `/api/v1/billing/checkout` | JWT | Create a Stripe checkout session, returns `{"checkout_url": "..."}` |
-| `POST` | `/api/v1/billing/webhook` | Stripe signature | Handle Stripe events: `checkout.session.completed`, `customer.subscription.updated`, `customer.subscription.deleted`, `invoice.payment_failed` |
-| `GET` | `/api/v1/billing/subscription` | JWT | Get current subscription information |
-| `DELETE` | `/api/v1/billing/subscription` | JWT | Cancel subscription and revert to free tier |
-
----
-
-## Data Model
-
-3 tables managed by Alembic migrations. Source: `app/models.py`
-
-### Tables
-
-| Table | Primary Key | Key Columns | Purpose |
-|---|---|---|---|
-| `users` | `id` (UUID) | `email` (unique), `password_hash`, `tier`, `stripe_customer_id`, timestamps | User accounts |
-| `refresh_tokens` | `id` (UUID) | `user_id` (FK), `token_hash` (SHA-256, unique), `expires_at` | Hashed refresh tokens for rotation |
-| `subscriptions` | `id` (UUID) | `user_id` (FK, unique), `stripe_subscription_id`, `tier`, `status`, `current_period_end` | Stripe subscription records |
-
-### Enum Types
-
-| Enum | Values |
-|---|---|
-| `billing_tier` | `free`, `pro`, `power`, `team` |
-
-### Migrations
-
-| Version | Description |
-|---|---|
-| `001_initial_schema` | Creates core auth and billing tables with indexes and foreign key constraints |
-
----
-
-## AI Agent System
-
-The agent system uses a registry pattern with LangChain tool-calling agents powered by GPT-4o. Source: `app/agents/`, `app/core/agent_registry.py`
-
-### Architecture
-
-- **`BaseAgent`** — Abstract base with `user_id` and `shared_memory`.
-- **`ChatAgent(BaseAgent)`** — Abstract `handle(query, context)` and `get_tools()` methods, plus a shared `_tool_loop(llm, messages, tools, max_iter=5)` for iterative tool calling.
-- **`AgentRegistry`** — Singleton registry with `@register` decorator, `get(name)`, `list_agents()`, and `call_agent(name, query, context)`.
-
-### Registered Agents
-
-| Agent | Registry Name | Tools | Description |
-|---|---|---|---|
-| **TaskAgent** | `task_agent` | 8 | Full task and comment CRUD. Status: `todo` / `in_progress` / `done`. Priority: `high` / `medium` / `low`. Tools: `list_tasks`, `create_task`, `update_task`, `delete_task`, `list_tasks_due_today`, `list_task_comments`, `add_task_comment`, `delete_task_comment` |
-| **ProjectAgent** | `project_agent` | 6 | Project lifecycle management. Status: `active` / `archived`. Prefers archiving over deletion. Tools: `list_projects`, `list_all_projects`, `get_project`, `create_project`, `update_project`, `delete_project` |
-| **TimelineAgent** | `timeline_agent` | 4 | Project milestones. Requires `project_id` for creation. Supports AI-suggestion and approval workflows. Tools: `list_timelines`, `create_timeline`, `update_timeline`, `delete_timeline` |
-| **NoteAgent** | `note_agent` | 5 | Markdown note management. Optionally linked to projects. Tools: `list_notes`, `get_note`, `create_note`, `update_note`, `delete_note` |
-
-All agents use the model configured by `LLM_MODEL` (default: GPT-4o) with `temperature=0` via LiteLLM. Tools return JSON action descriptors that the Electron client interprets and applies locally.
-
-### Switching LLM Providers
-
-The backend uses **LiteLLM** as a universal LLM gateway. All agents and the orchestrator instantiate models through a centralized factory in `app/core/llm.py`. To switch providers, change environment variables — no code changes required:
-
-```bash
-# OpenAI (default)
-LLM_MODEL=gpt-4o
-LLM_ROUTER_MODEL=gpt-4o-mini
-
-# Anthropic
-LLM_MODEL=anthropic/claude-3.5-sonnet
-LLM_ROUTER_MODEL=anthropic/claude-3-haiku
-
-# Google Gemini
-LLM_MODEL=gemini/gemini-pro
-LLM_ROUTER_MODEL=gemini/gemini-flash
-
-# Local Ollama
-LLM_MODEL=ollama/llama3
-LLM_ROUTER_MODEL=ollama/llama3
-
-# AWS Bedrock
-LLM_MODEL=bedrock/anthropic.claude-v2
-LLM_ROUTER_MODEL=bedrock/anthropic.claude-instant-v1
-```
-
-See the [LiteLLM provider docs](https://docs.litellm.ai/docs/providers) for the full list of 100+ supported providers and model naming conventions.
-
----
-
-## Orchestration & Execution Plans
-
-Source: `app/core/orchestrator.py`, `app/core/execution_plan.py`
-
-### Orchestrator
-
-1. **`classify_intent(message, context, registry)`** — Uses the router model (`LLM_ROUTER_MODEL`, default: GPT-4o-mini) to determine which agent should handle a message. Falls back to `task_agent` when classification is ambiguous.
-2. **`route_single(agent_name, message, context)`** — Routes to a single agent and returns a `ChatResponse`.
-3. **`route_pipeline(agent_names, message, context)`** — Executes agents sequentially; each receives `previous_results` from earlier agents. A final LLM synthesis step merges all results.
-4. **`orchestrate(request)`** — Main entry point. In `direct` mode, returns a `ChatResponse`. In `plan` mode, returns an `ExecutionPlan`.
-5. **`orchestrate_stream(request)`** — Streaming variant that yields 50-character text chunks with a final JSON frame.
-
-### Execution Plans
-
-- **`PromptTemplateRegistry`** — Maps template IDs to server-side prompt text. Clients only ever see opaque IDs, never raw prompts.
-- **`ExecutionPlanBuilder`** — Fluent builder API: `add_step()`, `add_llm_step(template_id, vars)`, `add_data_step(action, data_from_step)`. Validates step references on `build()`.
-- **`PlanCache`** — LRU cache (maxsize 1000) for storing plans as reusable playbooks.
-
-### Built-in Templates (6)
-
-`tpl_task_agent_default`, `tpl_timeline_agent_default`, `tpl_project_agent_default`, `tpl_note_agent_default`, `tpl_task_extract_from_project`, `tpl_note_weekly_summary`
-
-### Built-in Playbooks (2)
-
-| Playbook | Description |
-|---|---|
-| `create_tasks_from_project` | LLM extracts actionable tasks from project context, then creates task records |
-| `generate_weekly_note` | LLM generates a weekly summary, then creates a note record |
-
----
-
-## Middleware
-
-Middleware executes in this order on each request: **TierRateLimit → Sanitizer → CORS → Router**
-
-### JWT Authentication
-
-Source: `app/api/middleware/auth.py`
-
-- FastAPI dependency `get_current_user` validates the `Bearer` JWT and extracts `user_id` and `email`.
-- **Live tier lookup** — The current tier is fetched from the `subscriptions` table on every request (not cached in the JWT), so upgrades and downgrades take immediate effect.
-- Falls back to `free` when no subscription row exists.
-- Raises `401 Unauthorized` on invalid or expired tokens.
-- **Exempt paths:** `/api/v1/auth/register`, `/api/v1/auth/login`, `/api/v1/billing/webhook`
-
-### Tier-Based Rate Limiter
-
-Source: `app/api/middleware/rate_limit.py`
-
-- `TierRateLimitMiddleware` — Sliding-window in-process rate limiter (no Redis dependency).
-- Per-user 60-second window sized by subscription tier:
-
-| Tier | Requests / Minute |
-|---|---|
-| Free | 20 |
-| Pro | 60 |
-| Power | 120 |
-| Team | 200 |
-
-- Returns `429 Too Many Requests` with a `Retry-After` header when the limit is exceeded.
-- **Exempt paths:** register, login, webhook, health
-
-### Response Sanitizer
-
-Source: `app/api/middleware/sanitizer.py`
-
-- Runs only on `/api/v1/chat` endpoints.
-- Scans JSON response bodies and replaces leaked prompt IP fragments with `[REDACTED]`.
-- Detects: system prompt openers, agent routing metadata, LangChain tool schemas, internal reasoning markers (`<thinking>`, `[INST]`), and known prompt fingerprints.
-- Logs sanitization events as `WARNING`.
-
----
-
-## Billing & Tiers
-
-Source: `app/billing/stripe_service.py`, `app/billing/tier_manager.py`
-
-### Feature Matrix
-
-| Feature | Free | Pro | Power | Team |
-|---|---|---|---|---|
-| AI Agents | 3 | Unlimited | Unlimited | Unlimited |
-| Batch Active | 2 | 10 | Unlimited | Unlimited |
-| LLM Providers | 1 | Unlimited | Unlimited | Unlimited |
-| Batch Builder | — | — | ✓ | ✓ |
-| SSO | — | — | — | ✓ |
-| Rate Limit | 20 req/min | 60 req/min | 120 req/min | 200 req/min |
-
-### Stripe Integration
-
-- **Checkout** — `create_checkout_session(user_id, tier)` creates a Stripe Checkout session. Returns a stub URL when Stripe is not configured.
-- **Webhooks** — Handles `checkout.session.completed`, `customer.subscription.updated`, `customer.subscription.deleted`, and `invoice.payment_failed`.
-- **Subscription management** — `get_subscription()` returns the current subscription record; `cancel_subscription()` cancels via the Stripe API and reverts the user to the free tier.
-- **Price IDs:** `price_pro_monthly`, `price_power_monthly`, `price_team_monthly`
-
-### Tier Manager
-
-- `get_tier(user_id)` — Returns the user's current billing tier.
-- `check_feature(tier, feature)` — Boolean feature gate check.
-- `require_feature(tier, feature)` — Raises HTTP 403 if the feature is not available.
-
----
-
-## Testing
-
-### Running Tests
-
-```bash
-# Run all tests
-pytest
-
-# Run a specific test file
-pytest tests/test_auth.py
-
-# Run with verbose output
-pytest -v
-```
-
-### Test Infrastructure
-
-- **Database:** Async SQLite in-memory via `aiosqlite` + `StaticPool` — fast, no PostgreSQL needed.
-- **Auth helpers:** `make_jwt(tier)` and `auth_header(tier)` generate per-tier test tokens.
-- **Seed data:** Auto-creates one `User` + `Subscription` per tier (free/pro/power/team) before each test.
-- **FK enforcement:** SQLite `PRAGMA foreign_keys=ON`.
-- **No external dependencies** — all tests run fully offline.
-
-### Test Coverage
-
-| File | Coverage |
-|---|---|
-| `test_auth.py` | Register, login, token access, refresh, expiration |
-| `test_middleware.py` | Rate limiting by tier, sanitizer prompt leak detection |
-
----
-
-## Project Structure
-
-```
-adiuvai-api/
-├── alembic.ini                  # Alembic configuration
-├── docker-compose.yml           # Docker Compose (app + PostgreSQL)
-├── Dockerfile                   # Multi-stage production build
-├── requirements.txt             # Python dependencies
-│
-├── alembic/                     # Database migrations
-│   ├── env.py                   # Alembic environment config
-│   ├── script.py.mako           # Migration template
-│   └── versions/
-│       └── 001_initial_schema.py    # Tables, indexes, FKs
-│
-├── app/                         # Application source
-│   ├── main.py                  # FastAPI app factory, middleware, routes
-│   ├── db.py                    # Async SQLAlchemy engine & session
-│   ├── models.py                # SQLAlchemy ORM models
-│   ├── schemas.py               # Pydantic request/response schemas
-│   │
-│   ├── config/
-│   │   └── settings.py          # Pydantic Settings (env vars)
-│   │
-│   ├── agents/                  # LLM-powered domain agents
-│   │   ├── task_agent.py        # Task & comment CRUD (8 tools)
-│   │   ├── project_agent.py     # Project lifecycle (6 tools)
-│   │   ├── timeline_agent.py    # Milestones (4 tools)
-│   │   └── note_agent.py        # Markdown notes (5 tools)
-│   │
-│   ├── core/                    # Orchestration engine
-│   │   ├── agent_registry.py    # BaseAgent, ChatAgent, AgentRegistry
-│   │   ├── llm.py               # LiteLLM factory (get_llm, get_router_llm)
-│   │   └── deep_agent.py        # Deep agent orchestration
-│   │
-│   ├── api/                     # HTTP layer
-│   │   ├── deps.py              # Shared FastAPI dependencies
-│   │   ├── middleware/
-│   │   │   ├── rate_limit.py    # Sliding-window tier rate limiter
-│   │   │   └── sanitizer.py     # Prompt IP leak protection
-│   │   └── routes/
-│   │       ├── auth.py          # Register, login, refresh, me
-│   │       ├── chat.py          # Chat + embed endpoint
-│   │       ├── billing.py       # Stripe checkout, webhooks, subscription
-│   │       ├── agents.py        # Agent catalog, config, runs
-│   │       └── device_ws.py     # Persistent device WebSocket
-│   │
-│   └── billing/
-│       ├── stripe_service.py    # Stripe API wrapper
-│       └── tier_manager.py      # Feature matrix, rate limits
-│
-└── tests/                       # Test suite
-    ├── conftest.py              # Fixtures: DB, auth, seeds
-    ├── test_auth.py
-    ├── test_orchestrator.py
-    ├── test_agents.py
-    ├── test_agent_registry.py
-    ├── test_execution_plan.py
-    └── test_middleware.py
-```
-
----
-
-## License
-
-*To be determined.*
diff --git a/alembic/versions/003_agent_tables.py b/alembic/versions/003_agent_tables.py
index 1e503c8..455f03b 100644
--- a/alembic/versions/003_agent_tables.py
+++ b/alembic/versions/003_agent_tables.py
@@ -14,7 +14,7 @@ from alembic import op
 from sqlalchemy.dialects import postgresql
 
 revision: str = "003"
-down_revision: Union[str, None] = "002"
+down_revision: Union[str, None] = "001"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
diff --git a/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py b/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py
index f56b18e..60a9b96 100644
--- a/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py
+++ b/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py
@@ -1,4 +1,8 @@
-"""add agent_config to local_agent_configs
+"""Restore agent config tables and add agent_config column.
+
+9a1f2d0b6c7e dropped local_agent_configs and cloud_agent_configs, but both
+ORM models are still active. This migration recreates them with agent_config
+added to local_agent_configs.
 
 Revision ID: a3b9c0d1e2f3
 Revises: 9a1f2d0b6c7e
@@ -9,8 +13,9 @@ from __future__ import annotations
 
 from typing import Sequence, Union
 
-from alembic import op
 import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
 
 
 # revision identifiers, used by Alembic.
@@ -21,11 +26,82 @@ depends_on: Union[str, Sequence[str], None] = None
 
 
 def upgrade() -> None:
-    op.add_column(
-        "local_agent_configs",
-        sa.Column("agent_config", sa.JSON(), nullable=True),
-    )
+    # Recreate enum types (idempotent — they may already exist from migration 003)
+    op.execute("""
+        DO $$ BEGIN
+            CREATE TYPE agent_type AS ENUM ('local', 'cloud');
+        EXCEPTION WHEN duplicate_object THEN NULL;
+        END $$;
+    """)
+    op.execute("""
+        DO $$ BEGIN
+            CREATE TYPE agent_run_status AS ENUM ('running', 'success', 'error', 'partial');
+        EXCEPTION WHEN duplicate_object THEN NULL;
+        END $$;
+    """)
+    op.execute("""
+        DO $$ BEGIN
+            CREATE TYPE cloud_provider AS ENUM ('gmail', 'teams', 'outlook');
+        EXCEPTION WHEN duplicate_object THEN NULL;
+        END $$;
+    """)
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+    existing = set(inspector.get_table_names())
+
+    # ── local_agent_configs (with agent_config column) ────────────────────
+    if "local_agent_configs" not in existing:
+        op.create_table(
+            "local_agent_configs",
+            sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
+            sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
+            sa.Column("device_id", sa.String(255), nullable=False),
+            sa.Column("name", sa.String(255), nullable=False),
+            sa.Column("directory_paths", sa.JSON, nullable=False, server_default="[]"),
+            sa.Column("data_types", sa.JSON, nullable=False, server_default="[]"),
+            sa.Column("prompt_template", sa.Text, nullable=False, server_default=""),
+            sa.Column("agent_config", sa.JSON, nullable=True),
+            sa.Column("file_extensions", sa.JSON, nullable=False, server_default="[]"),
+            sa.Column("schedule_cron", sa.String(100), nullable=False, server_default="0 */6 * * *"),
+            sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.true()),
+            sa.Column("last_run_at", sa.DateTime(timezone=True), nullable=True),
+            sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
+            sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
+            sa.PrimaryKeyConstraint("id"),
+            sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
+        )
+        op.create_index("ix_local_agent_configs_user_id", "local_agent_configs", ["user_id"])
+
+    # ── cloud_agent_configs ───────────────────────────────────────────────
+    if "cloud_agent_configs" not in existing:
+        op.create_table(
+            "cloud_agent_configs",
+            sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False),
+            sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False),
+            sa.Column(
+                "provider",
+                postgresql.ENUM("gmail", "teams", "outlook", name="cloud_provider", create_type=False),
+                nullable=False,
+            ),
+            sa.Column("name", sa.String(255), nullable=False),
+            sa.Column("data_types", sa.JSON, nullable=False, server_default="[]"),
+            sa.Column("prompt_template", sa.Text, nullable=False, server_default=""),
+            sa.Column("oauth_token_encrypted", sa.Text, nullable=True),
+            sa.Column("filter_config", sa.JSON, nullable=True),
+            sa.Column("schedule_cron", sa.String(100), nullable=False, server_default="0 */6 * * *"),
+            sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.true()),
+            sa.Column("last_run_at", sa.DateTime(timezone=True), nullable=True),
+            sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
+            sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")),
+            sa.PrimaryKeyConstraint("id"),
+            sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
+        )
+        op.create_index("ix_cloud_agent_configs_user_id", "cloud_agent_configs", ["user_id"])
 
 
 def downgrade() -> None:
-    op.drop_column("local_agent_configs", "agent_config")
+    op.drop_index("ix_cloud_agent_configs_user_id", table_name="cloud_agent_configs")
+    op.drop_table("cloud_agent_configs")
+    op.drop_index("ix_local_agent_configs_user_id", table_name="local_agent_configs")
+    op.drop_table("local_agent_configs")
diff --git a/app/config/settings.py b/app/config/settings.py
index 65e8136..823c5d1 100644
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -18,7 +18,6 @@ class Settings(BaseSettings):
     CEREBRAS_API_KEY: str = ""
 
     LLM_MODEL: str = "gpt-4o"
-    LLM_ROUTER_MODEL: str = "gpt-4o-mini"
     LLM_EMBED_MODEL: str = "text-embedding-3-small"
 
     # GitHub Copilot OAuth token storage directory.
@@ -43,7 +42,7 @@ class Settings(BaseSettings):
 
     LANGFUSE_SECRET_KEY: str = ""
     LANGFUSE_PUBLIC_KEY: str = ""
-    LANGFUSE_HOST: str = "https://cloud.langfuse.com"
+    LANGFUSE_BASE_URL: str = "https://cloud.langfuse.com"
 
     ENV: Literal["dev", "prod"] = "dev"
 
diff --git a/app/core/langfuse_client.py b/app/core/langfuse_client.py
index 1a92827..b7f9b37 100644
--- a/app/core/langfuse_client.py
+++ b/app/core/langfuse_client.py
@@ -67,9 +67,9 @@ def get_langfuse() -> Any | None:
         _client = Langfuse(
             secret_key=settings.LANGFUSE_SECRET_KEY,
             public_key=settings.LANGFUSE_PUBLIC_KEY,
-            host=settings.LANGFUSE_HOST,
+            host=settings.LANGFUSE_BASE_URL,
         )
-        logger.info("langfuse: client initialized host=%s", settings.LANGFUSE_HOST)
+        logger.info("langfuse: client initialized host=%s", settings.LANGFUSE_BASE_URL)
     except Exception as exc:
         logger.warning("langfuse: failed to initialize: %s", exc)
         _client = None
diff --git a/app/core/llm.py b/app/core/llm.py
index 3415921..1787ce9 100644
--- a/app/core/llm.py
+++ b/app/core/llm.py
@@ -1,6 +1,6 @@
 """LLM factory — centralised model instantiation via LiteLLM.
 
-Every agent and the orchestrator call ``get_llm()`` or ``get_router_llm()``
+Every agent and the orchestrator call ``get_llm()``
 instead of directly constructing a provider-specific class.  The model string
 follows the `LiteLLM model naming convention
 <https://docs.litellm.ai/docs/providers>`_:
@@ -11,7 +11,7 @@ follows the `LiteLLM model naming convention
 * Ollama:     ``ollama/llama3``
 * Bedrock:    ``bedrock/anthropic.claude-v2``
 
-Switch providers by changing **LLM_MODEL** / **LLM_ROUTER_MODEL** in ``.env``
+Switch providers by changing **LLM_MODEL** in ``.env``
 — no code changes required.
 """
 
@@ -95,14 +95,6 @@ def get_llm(
     )
 
 
-def get_router_llm(
-    *,
-    temperature: float = 0,
-) -> ChatOpenAI | ChatLiteLLM:
-    """Return the lighter model used for intent classification / routing."""
-    return get_llm(model=settings.LLM_ROUTER_MODEL, temperature=temperature)
-
-
 async def embed(text: str) -> list[float]:
     """Return an embedding vector for *text*.