feat(i18n): inject user language into AI agent system prompts

- Add _language_instruction() to deep_agent.py, reads language from core memory - Append language directive to all 4 run_* functions (task/project/checkpoint/note) - Minor fixes: alembic env, route imports, test cleanup
2026-04-12 00:35:23 +02:00
parent 4073863dc6
commit 7ccdad431f
13 changed files with 41 additions and 18 deletions
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -16,7 +16,7 @@ import re
 from logging.config import fileConfig

 from alembic import context
-from sqlalchemy import engine_from_config, pool
+from sqlalchemy import pool
 from sqlalchemy.ext.asyncio import create_async_engine

 # Alembic Config object (gives access to alembic.ini values).
--- a/app/api/routes/agents.py
+++ b/app/api/routes/agents.py
@@ -14,7 +14,7 @@ from __future__ import annotations
 import asyncio
 import logging
 import uuid
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timezone

 logger = logging.getLogger(__name__)

--- a/app/core/agent_runner.py
+++ b/app/core/agent_runner.py
@@ -30,7 +30,6 @@ import asyncio
 import json
 import logging
 import os
-import uuid
 from datetime import datetime, timedelta, timezone
 from typing import Any

--- a/app/core/deep_agent.py
+++ b/app/core/deep_agent.py
@@ -27,6 +27,34 @@ logger = logging.getLogger(__name__)
 FloatingDomainType = Literal["task", "timeline", "project", "node"]
 FloatingDomainSection = Literal["task", "timeline", "note"]

+# Mapping of core-memory language values to natural-language names for prompts.
+_LANGUAGE_NAMES: dict[str, str] = {
+    "en": "English", "it": "Italian", "es": "Spanish",
+    "fr": "French", "de": "German",
+    "english": "English", "italian": "Italian", "italiano": "Italian",
+    "spanish": "Spanish", "español": "Spanish",
+    "french": "French", "français": "French",
+    "german": "German", "deutsch": "German",
+}
+
+
+def _language_instruction(context: dict[str, Any]) -> str:
+    """Return a system-prompt suffix that tells the LLM to respond in the user's language.
+
+    Returns an empty string when the language is English or unknown — saves tokens.
+    """
+    core = context.get("core_memory") or {}
+    raw = (core.get("language") or "").strip().lower()
+    if not raw:
+        return ""
+    lang = _LANGUAGE_NAMES.get(raw, raw.title())  # best-effort capitalisation
+    if lang.lower() == "english":
+        return ""
+    return (
+        f"\n\nIMPORTANT: Always respond in {lang}. "
+        f"All your output text must be written in {lang}."
+    )
+
 _HOME_SYSTEM_PROMPT = (
    "You are the home assistant with direct access to all tools: tasks, projects, notes, timelines, and memory tools. "
    "Always use tools for factual data retrieval before answering. "
@@ -876,6 +904,7 @@ async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
    system_prompt, langfuse_prompt = get_prompt_or_fallback(
        "home_system", _HOME_SYSTEM_PROMPT
    )
+    system_prompt += _language_instruction(context)
    response = await _run_single_agent(
        user_id=user_id,
        system_prompt=system_prompt,
@@ -893,6 +922,7 @@ async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> t
    system_prompt, langfuse_prompt = get_prompt_or_fallback(
        "floating_system", _FLOATING_SYSTEM_PROMPT
    )
+    system_prompt += _language_instruction(context)
    response = await _run_single_agent(
        user_id=user_id,
        system_prompt=system_prompt,
@@ -916,6 +946,7 @@ async def run_home_stream(
    system_prompt, langfuse_prompt = get_prompt_or_fallback(
        "home_system", _HOME_SYSTEM_PROMPT
    )
+    system_prompt += _language_instruction(context)
    text_chunks: list[str] = []
    async for event in _run_single_agent_stream(
        user_id=user_id,
@@ -948,6 +979,7 @@ async def run_floating_stream(
    system_prompt, langfuse_prompt = get_prompt_or_fallback(
        "floating_system", _FLOATING_SYSTEM_PROMPT
    )
+    system_prompt += _language_instruction(context)
    sanitizer = _FloatingStreamSanitizer()
    emitted_sanitized = False
    raw_chunks: list[str] = []
--- a/app/integrations/ms_graph.py
+++ b/app/integrations/ms_graph.py
@@ -25,7 +25,7 @@ from __future__ import annotations

 import logging
 import re
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timezone
 from typing import Any

 import httpx
--- a/tests/test_agent_runner.py
+++ b/tests/test_agent_runner.py
@@ -28,7 +28,6 @@ from datetime import datetime, timezone
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
-import pytest_asyncio

 from app.core.agent_runner import (
    _extract_items_from_content,
@@ -597,7 +596,7 @@ async def test_run_cloud_agent_provider_fetch_error():
@pytest.mark.asyncio
 async def test_run_cloud_agent_refreshed_token_persisted():
    """When the provider refreshes its token, the new ciphertext is written to DB."""
-    from app.integrations import EmailMessage, encrypt_token
+    from app.integrations import encrypt_token
    from cryptography.fernet import Fernet as _Fernet

    fernet_key = _Fernet.generate_key().decode()
--- a/tests/test_agent_runner_v2.py
+++ b/tests/test_agent_runner_v2.py
@@ -40,7 +40,6 @@ from app.core.agent_runner import (
    _format_projects,
    _get_extraction_rules,
    _get_no_match_behavior,
-    _is_overdue,
    run_local_agent,
 )
 from app.core.device_manager import DeviceConnectionManager
--- a/tests/test_agent_setup.py
+++ b/tests/test_agent_setup.py
@@ -21,7 +21,6 @@ import time
 import uuid
 from unittest.mock import AsyncMock, patch

-import pytest
 from fastapi.testclient import TestClient
 from sqlalchemy.ext.asyncio import AsyncSession

--- a/tests/test_device_ws.py
+++ b/tests/test_device_ws.py
@@ -18,13 +18,12 @@ from datetime import datetime, timezone
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
-import pytest_asyncio

-from app.core.device_manager import DeviceConnection, DeviceConnectionManager
+from app.core.device_manager import DeviceConnectionManager
 from app.db import get_session
 from app.main import app
 from app.models import AgentRunLog
-from tests.conftest import TEST_USER_IDS, auth_header, make_jwt
+from tests.conftest import TEST_USER_IDS, make_jwt

 # ---------------------------------------------------------------------------
 # Helpers
--- a/tests/test_integrations.py
+++ b/tests/test_integrations.py
@@ -40,11 +40,9 @@ Coverage:

 from __future__ import annotations

-import asyncio
 import json
-import uuid
 from datetime import datetime, timezone
-from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
+from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch

 import pytest

--- a/tests/test_memory_middleware.py
+++ b/tests/test_memory_middleware.py
@@ -19,7 +19,7 @@ import pytest_asyncio
 from cryptography.fernet import Fernet
 from sqlalchemy import select

-from app.core.memory_middleware import MemoryMiddleware, _PROACTIVE_CONFIDENCE_THRESHOLD
+from app.core.memory_middleware import MemoryMiddleware
 from app.db import get_session
 from app.main import app
 from app.models import (
--- a/tests/test_memory_models.py
+++ b/tests/test_memory_models.py
@@ -7,10 +7,9 @@ column is stored as JSON in tests (SQLite-compatible).
 from __future__ import annotations

 import uuid
-from datetime import datetime, timezone
+from datetime import datetime

 import pytest
-import pytest_asyncio
 from cryptography.fernet import Fernet
 from sqlalchemy import select

--- a/tests/test_preprocessors.py
+++ b/tests/test_preprocessors.py
@@ -12,7 +12,6 @@ from __future__ import annotations
 import re
 from pathlib import Path

-import pytest
 import yaml

 from app.core.preprocessors import detect_content_type, preprocess