From 41db3a7089e7f42503d66327007b26fb127630f6 Mon Sep 17 00:00:00 2001 From: Roberto Musso Date: Wed, 8 Apr 2026 23:52:52 +0200 Subject: [PATCH] update env variables --- .env.example | 9 +- README.md | 591 ------------------ alembic/versions/003_agent_tables.py | 2 +- ...d1e2f3_add_agent_config_to_local_agents.py | 90 ++- app/config/settings.py | 3 +- app/core/langfuse_client.py | 4 +- app/core/llm.py | 12 +- 7 files changed, 93 insertions(+), 618 deletions(-) diff --git a/.env.example b/.env.example index 576794b..d8d134d 100644 --- a/.env.example +++ b/.env.example @@ -16,8 +16,7 @@ JWT_REFRESH_TOKEN_EXPIRE_DAYS=30 OPENAI_API_KEY= ANTHROPIC_API_KEY= GOOGLE_API_KEY= -LLM_MODEL=gpt-4o -LLM_ROUTER_MODEL=gpt-4o-mini +LLM_MODEL=gpt-5-mini # ── Stripe (leave empty to stub billing) ────────────────────────────────────── STRIPE_SECRET_KEY= @@ -27,9 +26,9 @@ STRIPE_WEBHOOK_SECRET= # ── Langfuse (leave empty to disable observability) ─────────────────────────── LANGFUSE_SECRET_KEY= LANGFUSE_PUBLIC_KEY= -# LANGFUSE_HOST=https://cloud.langfuse.com # EU (default) -# LANGFUSE_HOST=https://us.cloud.langfuse.com # US -# LANGFUSE_HOST=http://localhost:3000 # Self-hosted +# LANGFUSE_BASE_URL=https://cloud.langfuse.com # EU (default) +# LANGFUSE_BASE_URL=https://us.cloud.langfuse.com # US +# LANGFUSE_BASE_URL=http://localhost:3000 # Self-hosted # ── CORS ────────────────────────────────────────────────────────────────────── # Comma-separated list parsed by Settings (override default if needed) diff --git a/README.md b/README.md index 1b6c19a..e69de29 100644 --- a/README.md +++ b/README.md @@ -1,591 +0,0 @@ -# AdiuvAI Cloud API - -**AI-powered project management backend with LLM orchestration and subscription billing.** - -Built with FastAPI · Python 3.12 · PostgreSQL · LangChain · Stripe - ---- - -## Table of Contents - -- [Overview](#overview) -- [Architecture](#architecture) -- [Key Features](#key-features) -- [Tech Stack](#tech-stack) -- [Getting Started](#getting-started) -- [Docker Deployment](#docker-deployment) -- [Environment Variables](#environment-variables) -- [API Reference](#api-reference) -- [Data Model](#data-model) -- [AI Agent System](#ai-agent-system) -- [Orchestration & Execution Plans](#orchestration--execution-plans) -- [Middleware](#middleware) -- [Billing & Tiers](#billing--tiers) -- [Testing](#testing) -- [Project Structure](#project-structure) -- [License](#license) - ---- - -## Overview - -AdiuvAI Cloud API is the FastAPI backend that powers the **AdiuvAI Electron desktop app**. It provides LLM-powered chat orchestration, text embedding generation, and Stripe-based subscription billing across four tiers. - -### Design Principles - -1. **Never expose prompts** — system prompts stay server-side; responses are sanitized to strip any leaked prompt fragments. -2. **Stateless request handling** — all context comes from the client and JWT; no server-side session state. -3. **Tier gates enforced server-side** — the server always reads the current tier from the database, never trusting client-reported values. - ---- - -## Architecture - -``` -┌──────────────┐ ┌────────────────────────────────────────────────────────┐ -│ Electron │ │ FastAPI (Uvicorn / Gunicorn) │ -│ Desktop App │────▶│ │ -│ (Client) │◀────│ Middleware: RateLimit → Sanitizer → CORS → Router │ -└──────────────┘ │ │ - │ ┌──────────────────┐ ┌────────────────────────────┐ │ - │ │ Auth Routes │ │ Chat Routes │ │ - │ │ Billing Routes │ │ ↓ │ │ - │ │ Agent Routes │ │ Orchestrator (GPT-4o-mini)│ │ - │ │ Device WS │ │ ↓ classify intent │ │ - │ └──────────────────┘ │ Agent Registry │ │ - │ │ ↓ │ │ - │ │ TaskAgent | ProjectAgent │ │ - │ │ NoteAgent | CheckptAgent │ │ - │ │ (GPT-4o + LangChain) │ │ - │ └────────────────────────────┘ │ - └────────────────────────────────────────────────────────┘ - │ - ┌────────▼───┐ - │ PostgreSQL │ - │ (Auth, │ - │ Billing, │ - │ Agents) │ - └────────────┘ - │ - ┌────────▼───┐ - │ Stripe │ - │ (Billing) │ - └────────────┘ -``` - ---- - -## Key Features - -1. **LLM-powered orchestration** — GPT-4o-mini classifies user intent and routes to the appropriate domain agent. -2. **4 specialized AI agents** — Tasks (8 tools), Projects (6 tools), Timelines (4 tools), Notes (5 tools), all powered by GPT-4o via LangChain. -3. **Execution plans & playbooks** — Server-side prompt template registry; clients receive only opaque template IDs, never raw prompts. -4. **Text embeddings** — Generates text-embedding-3-small vectors for local client-side note search. -5. **Stripe billing** — Four-tier subscription model (Free / Pro / Power / Team) with checkout sessions and full webhook lifecycle handling. -6. **JWT authentication** — Access + refresh tokens with bcrypt password hashing, SHA-256 token hashing, and automatic rotation. -7. **Prompt IP protection** — Sanitizer middleware strips system prompts, reasoning markers, tool schemas, and agent routing metadata from all chat responses. -8. **Tier-based rate limiting** — Sliding-window per-user limiter scaling from 20 to 200 requests/min by subscription tier. -9. **WebSocket streaming** — Real-time chat with 30-second heartbeat keep-alive and chunked text delivery. -10. **Alembic migrations** — Versioned schema management. -11. **Comprehensive test suite** — In-memory SQLite, per-tier test fixtures, and full API coverage without external dependencies. - ---- - -## Tech Stack - -| Package | Version | Purpose | -|---|---|---| -| `fastapi` | ≥ 0.115.0 | Web framework | -| `uvicorn[standard]` | ≥ 0.34.0 | ASGI development server | -| `gunicorn` | ≥ 22.0.0 | Production process manager | -| `langchain` | ≥ 0.3.0 | LLM orchestration framework | -| `langchain-openai` | ≥ 0.3.0 | OpenAI LLM provider integration | -| `litellm` | ≥ 1.50.0 | Universal LLM gateway (100+ providers) | -| `pydantic` | ≥ 2.10.0 | Data validation and serialization | -| `pydantic-settings` | ≥ 2.7.0 | Environment-based configuration | -| `python-jose[cryptography]` | ≥ 3.3.0 | JWT encoding and decoding | -| `stripe` | ≥ 11.0.0 | Billing and payment integration | -| `slowapi` | ≥ 0.1.9 | Rate limiting utilities | -| `sqlalchemy` | ≥ 2.0.0 | Async ORM and query builder | -| `asyncpg` | ≥ 0.30.0 | PostgreSQL async driver | -| `alembic` | ≥ 1.14.0 | Database migration management | -| `bcrypt` | ≥ 4.2.0 | Password hashing | -| `python-dotenv` | ≥ 1.0.0 | `.env` file loading | -| `httpx` | ≥ 0.28.0 | Async HTTP client (used in tests) | -| `websockets` | ≥ 14.0 | WebSocket protocol support | -| `psycopg2-binary` | ≥ 2.9.0 | Synchronous PostgreSQL driver (Alembic) | -| `pytest` | ≥ 8.0.0 | Test framework | -| `pytest-asyncio` | ≥ 0.24.0 | Async test support | -| `aiosqlite` | ≥ 0.20.0 | In-memory SQLite for tests | -| `ruff` | ≥ 0.8.0 | Linter and formatter | - ---- - -## Getting Started - -### Prerequisites - -- Python 3.12+ -- PostgreSQL 16+ -- An OpenAI API key (for LLM features) -- Stripe API keys (optional — billing stubs gracefully when unconfigured) - -### Installation - -```bash -# Clone the repository -git clone && cd adiuvai-api - -# Create a virtual environment -python -m venv .venv && source .venv/bin/activate - -# Install dependencies -pip install -r requirements.txt - -# Configure environment -cp .env.example .env -# Edit .env with your DATABASE_URL, OPENAI_API_KEY, etc. -``` - -### Database Setup - -```bash -# Start PostgreSQL (or use the Docker Compose database) -docker compose up db -d - -# Run migrations -alembic upgrade head -``` - -### Run the Development Server - -```bash -uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 -``` - -Interactive API docs are available at [http://localhost:8000/docs](http://localhost:8000/docs) in development mode (`ENV=dev`). The `/docs` endpoint is disabled in production. - ---- - -## Docker Deployment - -### Quick Start - -```bash -docker compose up --build -``` - -This starts two services: - -- **app** — FastAPI server on port `8000` -- **db** — PostgreSQL 16 (Alpine) on port `5432` with a persistent volume and health checks - -### Dockerfile Details - -The Dockerfile uses a multi-stage build: - -1. **Builder stage** — Installs Python dependencies into a virtual environment. -2. **Runtime stage** — Copies only the venv, app source, and Alembic migrations. Runs as a non-root user (`appuser`). -3. **Production server** — Gunicorn with 4 Uvicorn workers, 120-second timeout, listening on port 8000. - -```bash -# Production command (run by the container) -gunicorn app.main:app -k uvicorn.workers.UvicornWorker -w 4 --timeout 120 -b 0.0.0.0:8000 -``` - ---- - -## Homelab / Self-Hosted Deployment - -You can run the entire stack locally on a homelab with **no cloud dependencies except the LLM provider**. - -### 1. Start all services - -```bash -docker compose up -d -``` - -This starts PostgreSQL alongside the app. - -### 2. Configure your `.env` - -```bash -# Database (uses the compose PostgreSQL) -DATABASE_URL=postgresql+asyncpg://postgres:postgres@db:5432/adiuvai - -# Billing — leave empty to stub (no Stripe needed) -STRIPE_SECRET_KEY= -STRIPE_WEBHOOK_SECRET= - -# LLM — the only external service -OPENAI_API_KEY=sk-... -LLM_MODEL=gpt-4o -LLM_ROUTER_MODEL=gpt-4o-mini - -# Auth -JWT_SECRET=your-secret-here -ENV=dev -``` - -### 3. Run migrations - -```bash -docker compose exec app alembic upgrade head -``` - -### What runs where - -| Service | Runs on | Port | Notes | -|---|---|---|---| -| FastAPI app | Docker | 8000 | API server | -| PostgreSQL | Docker | 5432 | Auth, billing, agents | -| Stripe | — | — | Stubbed when keys are empty | -| OpenAI / LLM | Cloud | — | Only external dependency | - -> **Want fully offline AI too?** Set `LLM_MODEL=ollama/llama3` and `LLM_ROUTER_MODEL=ollama/llama3`, then add an Ollama container or point at a local Ollama instance. See the [LLM provider switching](#switching-llm-providers) section. - ---- - -## Environment Variables - -All variables are loaded from a `.env` file via Pydantic Settings. Source: `app/config/settings.py` - -| Variable | Type | Default | Description | -|---|---|---|---| -| `DATABASE_URL` | `str` | `postgresql+asyncpg://postgres:postgres@localhost:5432/adiuvai` | Async SQLAlchemy connection string | -| `JWT_SECRET` | `str` | `change-me-in-production` | HMAC secret for JWT signing | -| `JWT_ALGORITHM` | `str` | `HS256` | JWT signing algorithm | -| `JWT_ACCESS_TOKEN_EXPIRE_MINUTES` | `int` | `30` | Access token time-to-live | -| `JWT_REFRESH_TOKEN_EXPIRE_DAYS` | `int` | `30` | Refresh token time-to-live | -| `STRIPE_SECRET_KEY` | `str` | `""` | Stripe API key (empty = stub mode) | -| `STRIPE_WEBHOOK_SECRET` | `str` | `\"\"` | Stripe webhook signature secret |\n| `OPENAI_API_KEY` | `str` | `\"\"` | OpenAI key for LLM agent calls | -| `LLM_MODEL` | `str` | `gpt-4o` | LiteLLM model identifier for agents (e.g. `anthropic/claude-3.5-sonnet`, `gemini/gemini-pro`, `ollama/llama3`) | -| `LLM_ROUTER_MODEL` | `str` | `gpt-4o-mini` | Lighter model used for intent classification / routing | -| `CORS_ORIGINS` | `list[str]` | `["app://.", "http://localhost:3000", "http://localhost:5173"]` | Allowed CORS origins | -| `ENV` | `Literal` | `dev` | `dev` or `prod` — controls `/docs` visibility and SQL echo | - ---- - -## API Reference - -All routes are prefixed with `/api/v1`. **27 endpoints** total (25 REST + 1 WebSocket + 1 health check). - -### Health - -| Method | Path | Auth | Description | -|---|---|---|---| -| `GET` | `/api/v1/health` | No | Returns `{"status": "ok", "version": "0.1.0"}` | - -### Auth - -| Method | Path | Auth | Description | -|---|---|---|---| -| `POST` | `/api/v1/auth/register` | No | Create account with bcrypt-hashed password, returns `AuthTokens` | -| `POST` | `/api/v1/auth/login` | No | Validate credentials, returns `AuthTokens` | -| `POST` | `/api/v1/auth/refresh` | No | Rotate refresh token, returns new `AuthTokens` | -| `GET` | `/api/v1/auth/me` | JWT | Returns `UserProfile` for the authenticated user | - -### Chat - -| Method | Path | Auth | Description | -|---|---|---|---| -| `POST` | `/api/v1/chat` | JWT | Route message through the orchestrator; returns `ChatResponse` or `ExecutionPlan` depending on execution mode | -| `POST` | `/api/v1/chat/embed` | JWT | Generate a 1536-dim text embedding vector (`text-embedding-3-small`). Used by Electron for local note search. | -| `WS` | `/api/v1/chat/stream` | JWT (query param `?token=`) | Streaming chat — first frame is a `ChatRequest`, server yields text chunks, final frame is `{"done": true, "response": "...", "actions": [...]}`. 30-second heartbeat ping. | - -### Plans - -| Method | Path | Auth | Description | -|---|---|---|---| -| `GET` | `/api/v1/plans/playbook` | JWT | List all cached execution plan playbooks | -| `GET` | `/api/v1/plans/playbook/{plan_id}` | JWT | Retrieve a specific playbook by ID | - -### Billing - -| Method | Path | Auth | Description | -|---|---|---|---| -| `POST` | `/api/v1/billing/checkout` | JWT | Create a Stripe checkout session, returns `{"checkout_url": "..."}` | -| `POST` | `/api/v1/billing/webhook` | Stripe signature | Handle Stripe events: `checkout.session.completed`, `customer.subscription.updated`, `customer.subscription.deleted`, `invoice.payment_failed` | -| `GET` | `/api/v1/billing/subscription` | JWT | Get current subscription information | -| `DELETE` | `/api/v1/billing/subscription` | JWT | Cancel subscription and revert to free tier | - ---- - -## Data Model - -3 tables managed by Alembic migrations. Source: `app/models.py` - -### Tables - -| Table | Primary Key | Key Columns | Purpose | -|---|---|---|---| -| `users` | `id` (UUID) | `email` (unique), `password_hash`, `tier`, `stripe_customer_id`, timestamps | User accounts | -| `refresh_tokens` | `id` (UUID) | `user_id` (FK), `token_hash` (SHA-256, unique), `expires_at` | Hashed refresh tokens for rotation | -| `subscriptions` | `id` (UUID) | `user_id` (FK, unique), `stripe_subscription_id`, `tier`, `status`, `current_period_end` | Stripe subscription records | - -### Enum Types - -| Enum | Values | -|---|---| -| `billing_tier` | `free`, `pro`, `power`, `team` | - -### Migrations - -| Version | Description | -|---|---| -| `001_initial_schema` | Creates core auth and billing tables with indexes and foreign key constraints | - ---- - -## AI Agent System - -The agent system uses a registry pattern with LangChain tool-calling agents powered by GPT-4o. Source: `app/agents/`, `app/core/agent_registry.py` - -### Architecture - -- **`BaseAgent`** — Abstract base with `user_id` and `shared_memory`. -- **`ChatAgent(BaseAgent)`** — Abstract `handle(query, context)` and `get_tools()` methods, plus a shared `_tool_loop(llm, messages, tools, max_iter=5)` for iterative tool calling. -- **`AgentRegistry`** — Singleton registry with `@register` decorator, `get(name)`, `list_agents()`, and `call_agent(name, query, context)`. - -### Registered Agents - -| Agent | Registry Name | Tools | Description | -|---|---|---|---| -| **TaskAgent** | `task_agent` | 8 | Full task and comment CRUD. Status: `todo` / `in_progress` / `done`. Priority: `high` / `medium` / `low`. Tools: `list_tasks`, `create_task`, `update_task`, `delete_task`, `list_tasks_due_today`, `list_task_comments`, `add_task_comment`, `delete_task_comment` | -| **ProjectAgent** | `project_agent` | 6 | Project lifecycle management. Status: `active` / `archived`. Prefers archiving over deletion. Tools: `list_projects`, `list_all_projects`, `get_project`, `create_project`, `update_project`, `delete_project` | -| **TimelineAgent** | `timeline_agent` | 4 | Project milestones. Requires `project_id` for creation. Supports AI-suggestion and approval workflows. Tools: `list_timelines`, `create_timeline`, `update_timeline`, `delete_timeline` | -| **NoteAgent** | `note_agent` | 5 | Markdown note management. Optionally linked to projects. Tools: `list_notes`, `get_note`, `create_note`, `update_note`, `delete_note` | - -All agents use the model configured by `LLM_MODEL` (default: GPT-4o) with `temperature=0` via LiteLLM. Tools return JSON action descriptors that the Electron client interprets and applies locally. - -### Switching LLM Providers - -The backend uses **LiteLLM** as a universal LLM gateway. All agents and the orchestrator instantiate models through a centralized factory in `app/core/llm.py`. To switch providers, change environment variables — no code changes required: - -```bash -# OpenAI (default) -LLM_MODEL=gpt-4o -LLM_ROUTER_MODEL=gpt-4o-mini - -# Anthropic -LLM_MODEL=anthropic/claude-3.5-sonnet -LLM_ROUTER_MODEL=anthropic/claude-3-haiku - -# Google Gemini -LLM_MODEL=gemini/gemini-pro -LLM_ROUTER_MODEL=gemini/gemini-flash - -# Local Ollama -LLM_MODEL=ollama/llama3 -LLM_ROUTER_MODEL=ollama/llama3 - -# AWS Bedrock -LLM_MODEL=bedrock/anthropic.claude-v2 -LLM_ROUTER_MODEL=bedrock/anthropic.claude-instant-v1 -``` - -See the [LiteLLM provider docs](https://docs.litellm.ai/docs/providers) for the full list of 100+ supported providers and model naming conventions. - ---- - -## Orchestration & Execution Plans - -Source: `app/core/orchestrator.py`, `app/core/execution_plan.py` - -### Orchestrator - -1. **`classify_intent(message, context, registry)`** — Uses the router model (`LLM_ROUTER_MODEL`, default: GPT-4o-mini) to determine which agent should handle a message. Falls back to `task_agent` when classification is ambiguous. -2. **`route_single(agent_name, message, context)`** — Routes to a single agent and returns a `ChatResponse`. -3. **`route_pipeline(agent_names, message, context)`** — Executes agents sequentially; each receives `previous_results` from earlier agents. A final LLM synthesis step merges all results. -4. **`orchestrate(request)`** — Main entry point. In `direct` mode, returns a `ChatResponse`. In `plan` mode, returns an `ExecutionPlan`. -5. **`orchestrate_stream(request)`** — Streaming variant that yields 50-character text chunks with a final JSON frame. - -### Execution Plans - -- **`PromptTemplateRegistry`** — Maps template IDs to server-side prompt text. Clients only ever see opaque IDs, never raw prompts. -- **`ExecutionPlanBuilder`** — Fluent builder API: `add_step()`, `add_llm_step(template_id, vars)`, `add_data_step(action, data_from_step)`. Validates step references on `build()`. -- **`PlanCache`** — LRU cache (maxsize 1000) for storing plans as reusable playbooks. - -### Built-in Templates (6) - -`tpl_task_agent_default`, `tpl_timeline_agent_default`, `tpl_project_agent_default`, `tpl_note_agent_default`, `tpl_task_extract_from_project`, `tpl_note_weekly_summary` - -### Built-in Playbooks (2) - -| Playbook | Description | -|---|---| -| `create_tasks_from_project` | LLM extracts actionable tasks from project context, then creates task records | -| `generate_weekly_note` | LLM generates a weekly summary, then creates a note record | - ---- - -## Middleware - -Middleware executes in this order on each request: **TierRateLimit → Sanitizer → CORS → Router** - -### JWT Authentication - -Source: `app/api/middleware/auth.py` - -- FastAPI dependency `get_current_user` validates the `Bearer` JWT and extracts `user_id` and `email`. -- **Live tier lookup** — The current tier is fetched from the `subscriptions` table on every request (not cached in the JWT), so upgrades and downgrades take immediate effect. -- Falls back to `free` when no subscription row exists. -- Raises `401 Unauthorized` on invalid or expired tokens. -- **Exempt paths:** `/api/v1/auth/register`, `/api/v1/auth/login`, `/api/v1/billing/webhook` - -### Tier-Based Rate Limiter - -Source: `app/api/middleware/rate_limit.py` - -- `TierRateLimitMiddleware` — Sliding-window in-process rate limiter (no Redis dependency). -- Per-user 60-second window sized by subscription tier: - -| Tier | Requests / Minute | -|---|---| -| Free | 20 | -| Pro | 60 | -| Power | 120 | -| Team | 200 | - -- Returns `429 Too Many Requests` with a `Retry-After` header when the limit is exceeded. -- **Exempt paths:** register, login, webhook, health - -### Response Sanitizer - -Source: `app/api/middleware/sanitizer.py` - -- Runs only on `/api/v1/chat` endpoints. -- Scans JSON response bodies and replaces leaked prompt IP fragments with `[REDACTED]`. -- Detects: system prompt openers, agent routing metadata, LangChain tool schemas, internal reasoning markers (``, `[INST]`), and known prompt fingerprints. -- Logs sanitization events as `WARNING`. - ---- - -## Billing & Tiers - -Source: `app/billing/stripe_service.py`, `app/billing/tier_manager.py` - -### Feature Matrix - -| Feature | Free | Pro | Power | Team | -|---|---|---|---|---| -| AI Agents | 3 | Unlimited | Unlimited | Unlimited | -| Batch Active | 2 | 10 | Unlimited | Unlimited | -| LLM Providers | 1 | Unlimited | Unlimited | Unlimited | -| Batch Builder | — | — | ✓ | ✓ | -| SSO | — | — | — | ✓ | -| Rate Limit | 20 req/min | 60 req/min | 120 req/min | 200 req/min | - -### Stripe Integration - -- **Checkout** — `create_checkout_session(user_id, tier)` creates a Stripe Checkout session. Returns a stub URL when Stripe is not configured. -- **Webhooks** — Handles `checkout.session.completed`, `customer.subscription.updated`, `customer.subscription.deleted`, and `invoice.payment_failed`. -- **Subscription management** — `get_subscription()` returns the current subscription record; `cancel_subscription()` cancels via the Stripe API and reverts the user to the free tier. -- **Price IDs:** `price_pro_monthly`, `price_power_monthly`, `price_team_monthly` - -### Tier Manager - -- `get_tier(user_id)` — Returns the user's current billing tier. -- `check_feature(tier, feature)` — Boolean feature gate check. -- `require_feature(tier, feature)` — Raises HTTP 403 if the feature is not available. - ---- - -## Testing - -### Running Tests - -```bash -# Run all tests -pytest - -# Run a specific test file -pytest tests/test_auth.py - -# Run with verbose output -pytest -v -``` - -### Test Infrastructure - -- **Database:** Async SQLite in-memory via `aiosqlite` + `StaticPool` — fast, no PostgreSQL needed. -- **Auth helpers:** `make_jwt(tier)` and `auth_header(tier)` generate per-tier test tokens. -- **Seed data:** Auto-creates one `User` + `Subscription` per tier (free/pro/power/team) before each test. -- **FK enforcement:** SQLite `PRAGMA foreign_keys=ON`. -- **No external dependencies** — all tests run fully offline. - -### Test Coverage - -| File | Coverage | -|---|---| -| `test_auth.py` | Register, login, token access, refresh, expiration | -| `test_middleware.py` | Rate limiting by tier, sanitizer prompt leak detection | - ---- - -## Project Structure - -``` -adiuvai-api/ -├── alembic.ini # Alembic configuration -├── docker-compose.yml # Docker Compose (app + PostgreSQL) -├── Dockerfile # Multi-stage production build -├── requirements.txt # Python dependencies -│ -├── alembic/ # Database migrations -│ ├── env.py # Alembic environment config -│ ├── script.py.mako # Migration template -│ └── versions/ -│ └── 001_initial_schema.py # Tables, indexes, FKs -│ -├── app/ # Application source -│ ├── main.py # FastAPI app factory, middleware, routes -│ ├── db.py # Async SQLAlchemy engine & session -│ ├── models.py # SQLAlchemy ORM models -│ ├── schemas.py # Pydantic request/response schemas -│ │ -│ ├── config/ -│ │ └── settings.py # Pydantic Settings (env vars) -│ │ -│ ├── agents/ # LLM-powered domain agents -│ │ ├── task_agent.py # Task & comment CRUD (8 tools) -│ │ ├── project_agent.py # Project lifecycle (6 tools) -│ │ ├── timeline_agent.py # Milestones (4 tools) -│ │ └── note_agent.py # Markdown notes (5 tools) -│ │ -│ ├── core/ # Orchestration engine -│ │ ├── agent_registry.py # BaseAgent, ChatAgent, AgentRegistry -│ │ ├── llm.py # LiteLLM factory (get_llm, get_router_llm) -│ │ └── deep_agent.py # Deep agent orchestration -│ │ -│ ├── api/ # HTTP layer -│ │ ├── deps.py # Shared FastAPI dependencies -│ │ ├── middleware/ -│ │ │ ├── rate_limit.py # Sliding-window tier rate limiter -│ │ │ └── sanitizer.py # Prompt IP leak protection -│ │ └── routes/ -│ │ ├── auth.py # Register, login, refresh, me -│ │ ├── chat.py # Chat + embed endpoint -│ │ ├── billing.py # Stripe checkout, webhooks, subscription -│ │ ├── agents.py # Agent catalog, config, runs -│ │ └── device_ws.py # Persistent device WebSocket -│ │ -│ └── billing/ -│ ├── stripe_service.py # Stripe API wrapper -│ └── tier_manager.py # Feature matrix, rate limits -│ -└── tests/ # Test suite - ├── conftest.py # Fixtures: DB, auth, seeds - ├── test_auth.py - ├── test_orchestrator.py - ├── test_agents.py - ├── test_agent_registry.py - ├── test_execution_plan.py - └── test_middleware.py -``` - ---- - -## License - -*To be determined.* diff --git a/alembic/versions/003_agent_tables.py b/alembic/versions/003_agent_tables.py index 1e503c8..455f03b 100644 --- a/alembic/versions/003_agent_tables.py +++ b/alembic/versions/003_agent_tables.py @@ -14,7 +14,7 @@ from alembic import op from sqlalchemy.dialects import postgresql revision: str = "003" -down_revision: Union[str, None] = "002" +down_revision: Union[str, None] = "001" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None diff --git a/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py b/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py index f56b18e..60a9b96 100644 --- a/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py +++ b/alembic/versions/a3b9c0d1e2f3_add_agent_config_to_local_agents.py @@ -1,4 +1,8 @@ -"""add agent_config to local_agent_configs +"""Restore agent config tables and add agent_config column. + +9a1f2d0b6c7e dropped local_agent_configs and cloud_agent_configs, but both +ORM models are still active. This migration recreates them with agent_config +added to local_agent_configs. Revision ID: a3b9c0d1e2f3 Revises: 9a1f2d0b6c7e @@ -9,8 +13,9 @@ from __future__ import annotations from typing import Sequence, Union -from alembic import op import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. @@ -21,11 +26,82 @@ depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: - op.add_column( - "local_agent_configs", - sa.Column("agent_config", sa.JSON(), nullable=True), - ) + # Recreate enum types (idempotent — they may already exist from migration 003) + op.execute(""" + DO $$ BEGIN + CREATE TYPE agent_type AS ENUM ('local', 'cloud'); + EXCEPTION WHEN duplicate_object THEN NULL; + END $$; + """) + op.execute(""" + DO $$ BEGIN + CREATE TYPE agent_run_status AS ENUM ('running', 'success', 'error', 'partial'); + EXCEPTION WHEN duplicate_object THEN NULL; + END $$; + """) + op.execute(""" + DO $$ BEGIN + CREATE TYPE cloud_provider AS ENUM ('gmail', 'teams', 'outlook'); + EXCEPTION WHEN duplicate_object THEN NULL; + END $$; + """) + + bind = op.get_bind() + inspector = sa.inspect(bind) + existing = set(inspector.get_table_names()) + + # ── local_agent_configs (with agent_config column) ──────────────────── + if "local_agent_configs" not in existing: + op.create_table( + "local_agent_configs", + sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False), + sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False), + sa.Column("device_id", sa.String(255), nullable=False), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("directory_paths", sa.JSON, nullable=False, server_default="[]"), + sa.Column("data_types", sa.JSON, nullable=False, server_default="[]"), + sa.Column("prompt_template", sa.Text, nullable=False, server_default=""), + sa.Column("agent_config", sa.JSON, nullable=True), + sa.Column("file_extensions", sa.JSON, nullable=False, server_default="[]"), + sa.Column("schedule_cron", sa.String(100), nullable=False, server_default="0 */6 * * *"), + sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.true()), + sa.Column("last_run_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.PrimaryKeyConstraint("id"), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + ) + op.create_index("ix_local_agent_configs_user_id", "local_agent_configs", ["user_id"]) + + # ── cloud_agent_configs ─────────────────────────────────────────────── + if "cloud_agent_configs" not in existing: + op.create_table( + "cloud_agent_configs", + sa.Column("id", postgresql.UUID(as_uuid=False), nullable=False), + sa.Column("user_id", postgresql.UUID(as_uuid=False), nullable=False), + sa.Column( + "provider", + postgresql.ENUM("gmail", "teams", "outlook", name="cloud_provider", create_type=False), + nullable=False, + ), + sa.Column("name", sa.String(255), nullable=False), + sa.Column("data_types", sa.JSON, nullable=False, server_default="[]"), + sa.Column("prompt_template", sa.Text, nullable=False, server_default=""), + sa.Column("oauth_token_encrypted", sa.Text, nullable=True), + sa.Column("filter_config", sa.JSON, nullable=True), + sa.Column("schedule_cron", sa.String(100), nullable=False, server_default="0 */6 * * *"), + sa.Column("enabled", sa.Boolean, nullable=False, server_default=sa.true()), + sa.Column("last_run_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.PrimaryKeyConstraint("id"), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + ) + op.create_index("ix_cloud_agent_configs_user_id", "cloud_agent_configs", ["user_id"]) def downgrade() -> None: - op.drop_column("local_agent_configs", "agent_config") + op.drop_index("ix_cloud_agent_configs_user_id", table_name="cloud_agent_configs") + op.drop_table("cloud_agent_configs") + op.drop_index("ix_local_agent_configs_user_id", table_name="local_agent_configs") + op.drop_table("local_agent_configs") diff --git a/app/config/settings.py b/app/config/settings.py index 65e8136..823c5d1 100644 --- a/app/config/settings.py +++ b/app/config/settings.py @@ -18,7 +18,6 @@ class Settings(BaseSettings): CEREBRAS_API_KEY: str = "" LLM_MODEL: str = "gpt-4o" - LLM_ROUTER_MODEL: str = "gpt-4o-mini" LLM_EMBED_MODEL: str = "text-embedding-3-small" # GitHub Copilot OAuth token storage directory. @@ -43,7 +42,7 @@ class Settings(BaseSettings): LANGFUSE_SECRET_KEY: str = "" LANGFUSE_PUBLIC_KEY: str = "" - LANGFUSE_HOST: str = "https://cloud.langfuse.com" + LANGFUSE_BASE_URL: str = "https://cloud.langfuse.com" ENV: Literal["dev", "prod"] = "dev" diff --git a/app/core/langfuse_client.py b/app/core/langfuse_client.py index 1a92827..b7f9b37 100644 --- a/app/core/langfuse_client.py +++ b/app/core/langfuse_client.py @@ -67,9 +67,9 @@ def get_langfuse() -> Any | None: _client = Langfuse( secret_key=settings.LANGFUSE_SECRET_KEY, public_key=settings.LANGFUSE_PUBLIC_KEY, - host=settings.LANGFUSE_HOST, + host=settings.LANGFUSE_BASE_URL, ) - logger.info("langfuse: client initialized host=%s", settings.LANGFUSE_HOST) + logger.info("langfuse: client initialized host=%s", settings.LANGFUSE_BASE_URL) except Exception as exc: logger.warning("langfuse: failed to initialize: %s", exc) _client = None diff --git a/app/core/llm.py b/app/core/llm.py index 3415921..1787ce9 100644 --- a/app/core/llm.py +++ b/app/core/llm.py @@ -1,6 +1,6 @@ """LLM factory — centralised model instantiation via LiteLLM. -Every agent and the orchestrator call ``get_llm()`` or ``get_router_llm()`` +Every agent and the orchestrator call ``get_llm()`` instead of directly constructing a provider-specific class. The model string follows the `LiteLLM model naming convention `_: @@ -11,7 +11,7 @@ follows the `LiteLLM model naming convention * Ollama: ``ollama/llama3`` * Bedrock: ``bedrock/anthropic.claude-v2`` -Switch providers by changing **LLM_MODEL** / **LLM_ROUTER_MODEL** in ``.env`` +Switch providers by changing **LLM_MODEL** in ``.env`` — no code changes required. """ @@ -95,14 +95,6 @@ def get_llm( ) -def get_router_llm( - *, - temperature: float = 0, -) -> ChatOpenAI | ChatLiteLLM: - """Return the lighter model used for intent classification / routing.""" - return get_llm(model=settings.LLM_ROUTER_MODEL, temperature=temperature) - - async def embed(text: str) -> list[float]: """Return an embedding vector for *text*.