diff --git a/.gitignore b/.gitignore
index 8e6f860..a098c93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,3 +35,6 @@ Thumbs.db
 # Claude Code
 .claude/
 logs/
+
+# Eval private test data
+services/batch-agent/eval/fixtures/private_data/
diff --git a/app/config/settings.py b/app/config/settings.py
index 03c9906..e566969 100644
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -27,6 +27,7 @@ class Settings(BaseSettings):
     ANTHROPIC_API_KEY: str = ""
     GOOGLE_API_KEY: str = ""
     CEREBRAS_API_KEY: str = ""
+    GITHUB_TOKEN: str = ""
 
     LLM_MODEL: str = "gpt-4o"
     LLM_EMBED_MODEL: str = "text-embedding-3-small"
diff --git a/app/core/llm.py b/app/core/llm.py
index 1787ce9..cfd9d84 100644
--- a/app/core/llm.py
+++ b/app/core/llm.py
@@ -50,6 +50,8 @@ def _api_key_for_model(model: str) -> str | None:
         return settings.GOOGLE_API_KEY or None
     if model.startswith("cerebras/"):
         return settings.CEREBRAS_API_KEY or None
+    if model.startswith("github/"):
+        return settings.GITHUB_TOKEN or None
     if model.startswith("github_copilot/"):
         # GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM.
         # No API key is required; returning None lets LiteLLM handle auth.
@@ -83,6 +85,9 @@ def get_llm(
     if settings.GITHUB_COPILOT_TOKEN_DIR:
         os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
 
+    if settings.GITHUB_TOKEN:
+        os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
+
     # Use ChatLiteLLM for provider-prefixed models (github_copilot/, anthropic/, etc.)
     # so LiteLLM handles routing and auth. ChatOpenAI for plain OpenAI model names.
     if "/" in model:
diff --git a/services/batch-agent/app/journey.py b/services/batch-agent/app/journey.py
index 9dcafeb..fe6d8d0 100644
--- a/services/batch-agent/app/journey.py
+++ b/services/batch-agent/app/journey.py
@@ -80,17 +80,9 @@ def get_journey_session(session_id: str, user_id: str) -> JourneySession | None:
 _SYSTEM_PROMPT_TEMPLATE = """\
 You are a friendly assistant helping a freelancer configure a data-extraction agent.
 Your job is to understand exactly what data the user wants to extract from their
-local directory and produce a detailed prompt_template that a separate AI will use
+local directory and produce a concise prompt_template that a separate AI will use
 as its instruction set.
 
-The extraction agent already has this base behaviour built in:
-  - Reads each file using file-system tools.
-  - Creates records (tasks, notes, timelines, projects) via CRUD tools.
-  - Sets isAiSuggested=1 on every new record.
-  - Only extracts data explicitly present in the files — it never invents information.
-The user's custom prompt is appended AFTER this base behaviour, so focus on
-what to look for and how to map it — not on the general extraction mechanics.
-
 You have access to file-system tools to explore the user's directory:
 - list_directory: to see folder structure
 - read_file_content: to peek at file contents
@@ -99,38 +91,43 @@ You have access to file-system tools to explore the user's directory:
 The user's configured directory is: {directory}
 Target data types: {data_types}
 
-IMPORTANT — project assignment is handled automatically by the main agent runner
-before the custom prompt is ever used.  You MUST NOT ask the user about projects,
-projectId, or how to link records to projects.  Never include projectId logic or
-project creation instructions in the generated prompt_template.
+IMPORTANT — project assignment is handled automatically.  You MUST NOT ask the user
+about projects, projectId, or how to link records to projects.  Never include
+projectId logic or project creation instructions in the generated prompt_template.
 
 Start by exploring the directory to understand its structure.  Then ask concise,
-focused questions one at a time.  Cover these topics (not necessarily in this order):
-  1. The type and format of the source content (confirmed by your exploration).
-  2. How fields should be mapped (e.g. filename → task title).
-  3. Priority or status rules (e.g. "urgent" keyword → high priority).
-  4. Any special handling, date extraction, or exclusions.
+focused questions one at a time.  Cover only the topics relevant to the target
+data types listed above:
 
-Once you reach 90% confidence, output the final prompt_template between these exact
-markers on their own lines:
+  1. Content type and format — confirmed by your exploration.
+  2. For TASKS (if in scope): field mapping for title, status, priority, content,
+       dueDate (where is the date found? what's the fallback when absent?),
+       and assignee (is there a person name to assign?).
+  3. For NOTES when TASKS are also in scope: note vs task distinction —
+       what makes something a note rather than a task?
+  4. For TIMELINES (if in scope): the date source — what marks a milestone or event?
+  5. Exclusions and special handling applicable to the target data types.
+
+Keep asking focused questions until you are at least 90% confident.  Then stop and
+output the final prompt_template immediately, wrapped between these exact markers
+on their own lines:
 
 {template_start}
 <the complete extraction prompt here>
 {template_end}
 
-The prompt_template must be a self-contained instruction for an AI that reads files
-and must perform CRUD operations using tools to create records.  It should specify:
-  - What entity types to create (tasks, notes, timelines) — never projects.
-  - How to map file content to record fields (camelCase: title, status, priority,
-    dueDate, content, etc.) — never include projectId.
-  - That isAiSuggested must be set to 1 on every new record.
-  - Concrete examples of mappings based on what you discovered in the directory.
+The prompt_template must be concise (bullet points, ~15–25 lines maximum).
+Specify only:
+  - Scope: what files/content qualify and what entity types to create.
+  - Field mapping rules per entity type (camelCase fields: title, status, priority,
+    dueDate, content, assignee, etc.).
+  - dueDate rule (if tasks in scope): source and fallback behaviour.
+  - Note vs task rule (if both in scope): the criterion that separates them.
+  - Timeline date rule (if timelines in scope): what constitutes a timeline event.
+  - Exclusion/filtering rules.
+  - 2–3 concrete mapping examples based on what you discovered.
 
-{existing_section}\
-Keep asking clarifying questions until you are at least 90% confident you have
-enough information to generate an accurate prompt_template.  Once you reach that
-confidence level, stop asking and produce the final template immediately.
-Begin by exploring the directory, then ask your first question.\
+{existing_section}Begin by exploring the directory, then ask your first question.\
 """
 
 
@@ -152,8 +149,6 @@ def _build_system_prompt(
         variables={
             "directory": directory,
             "data_types": ", ".join(data_types),
-            "template_start": _TEMPLATE_START,
-            "template_end": _TEMPLATE_END,
             "existing_section": existing_section,
         },
     )
diff --git a/services/batch-agent/app/llm.py b/services/batch-agent/app/llm.py
new file mode 100644
index 0000000..603e376
--- /dev/null
+++ b/services/batch-agent/app/llm.py
@@ -0,0 +1,76 @@
+"""LLM factory — centralised model instantiation via LiteLLM.
+
+Identical to services/chat/app/llm.py. Uses shared.config.settings.
+"""
+
+from __future__ import annotations
+
+import os
+import warnings
+
+from openai import AsyncOpenAI
+import litellm
+
+from langchain_openai import ChatOpenAI
+from langchain_litellm import ChatLiteLLM
+
+from shared.config import settings
+
+litellm.drop_params = True
+
+warnings.filterwarnings(
+    "ignore",
+    message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`",
+    category=UserWarning,
+)
+
+
+def _api_key_for_model(model: str) -> str | None:
+    if model.startswith("anthropic/"):
+        return settings.ANTHROPIC_API_KEY or None
+    if model.startswith("gemini/") or model.startswith("google/"):
+        return settings.GOOGLE_API_KEY or None
+    if model.startswith("cerebras/"):
+        return settings.CEREBRAS_API_KEY or None
+    if model.startswith("github/"):
+        return settings.GITHUB_TOKEN or None
+    if model.startswith("github_copilot/"):
+        return None
+    return settings.OPENAI_API_KEY or None
+
+
+def get_llm(
+    *,
+    model: str | None = None,
+    temperature: float = 0,
+    callbacks: list | None = None,
+) -> ChatOpenAI | ChatLiteLLM:
+    model = model or settings.LLM_MODEL
+
+    if settings.GITHUB_COPILOT_TOKEN_DIR:
+        os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
+
+    if settings.GITHUB_TOKEN:
+        os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
+
+    if "/" in model:
+        return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)
+
+    return ChatOpenAI(
+        model=model,
+        temperature=temperature,
+        api_key=_api_key_for_model(model),
+        callbacks=callbacks,
+    )
+
+
+async def embed(text: str) -> list[float]:
+    model = settings.LLM_EMBED_MODEL
+
+    if model.startswith("github_copilot/") or "/" in model:
+        response = await litellm.aembedding(model=model, input=[text])
+        return response.data[0]["embedding"]
+
+    client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
+    response = await client.embeddings.create(model=model, input=text)
+    return response.data[0].embedding
diff --git a/services/batch-agent/app/redis_consumer.py b/services/batch-agent/app/redis_consumer.py
index 8adb02f..cb2d8a3 100644
--- a/services/batch-agent/app/redis_consumer.py
+++ b/services/batch-agent/app/redis_consumer.py
@@ -138,6 +138,8 @@ async def _dispatch(user_id: str, message_data: dict[str, Any]) -> None:
         await _handle_journey_message(user_id, message_data)
     elif msg_type == "agent_trigger":
         await _handle_agent_trigger(user_id, message_data)
+    elif msg_type == "device_online":
+        logger.info("batch-agent: device_online user=%s device=%s", user_id, message_data.get("device_id", "?"))
     else:
         logger.warning("batch-agent: unknown message type %r from user=%s", msg_type, user_id)
 
diff --git a/services/batch-agent/app/tracing.py b/services/batch-agent/app/tracing.py
index 430edf1..75a0dbb 100644
--- a/services/batch-agent/app/tracing.py
+++ b/services/batch-agent/app/tracing.py
@@ -232,6 +232,38 @@ def compile_prompt(
         return fallback.format(**variables)
 
 
+def get_prompt_object(
+    name: str,
+    *,
+    version: int | None = None,
+    label: str | None = None,
+    cache_ttl_seconds: int = 300,
+) -> Any | None:
+    """Fetch the raw Langfuse prompt *object* (not the compiled string).
+
+    Returns ``None`` when Langfuse is disabled or the prompt is not found.
+    Use this when you need to pass the prompt to ``start_observation(prompt=...)``
+    for linking the prompt to a trace in the Langfuse UI.
+    """
+    lf = _get_client()
+    if lf is None:
+        return None
+
+    try:
+        kwargs: dict[str, Any] = {
+            "name": name,
+            "cache_ttl_seconds": cache_ttl_seconds,
+        }
+        if version is not None:
+            kwargs["version"] = version
+        if label is not None:
+            kwargs["label"] = label
+        return lf.get_prompt(**kwargs)
+    except Exception as exc:
+        logger.warning("tracing: get_prompt_object(%s) failed: %s", name, exc)
+        return None
+
+
 def link_prompt_to_trace(
     span: Any,
     prompt_name: str,
@@ -239,19 +271,19 @@ def link_prompt_to_trace(
     version: int | None = None,
     label: str | None = None,
 ) -> None:
-    """Attach prompt metadata to a span/trace."""
+    """Link a Langfuse managed prompt to a span/observation.
+
+    Uses the SDK v4 ``prompt=`` parameter so that the prompt version
+    appears linked in the Langfuse UI with metrics tracking.
+    """
     lf = _get_client()
     if lf is None or isinstance(span, _NullSpan):
         return
 
     try:
-        kwargs: dict[str, Any] = {"name": prompt_name}
-        if version is not None:
-            kwargs["version"] = version
-        if label is not None:
-            kwargs["label"] = label
-        prompt = lf.get_prompt(**kwargs)
-        span.update(metadata={"prompt": {"name": prompt_name, "version": prompt.version}})
+        prompt = get_prompt_object(prompt_name, version=version, label=label)
+        if prompt is not None:
+            span.update(prompt=prompt)
     except Exception as exc:
         logger.warning("tracing: link_prompt_to_trace(%s) failed: %s", prompt_name, exc)
 
diff --git a/services/chat/app/llm.py b/services/chat/app/llm.py
new file mode 100644
index 0000000..b979362
--- /dev/null
+++ b/services/chat/app/llm.py
@@ -0,0 +1,77 @@
+"""LLM factory — centralised model instantiation via LiteLLM.
+
+Adapted from app/core/llm.py for the Chat Service.
+Uses shared.config.settings instead of app.config.settings.
+"""
+
+from __future__ import annotations
+
+import os
+import warnings
+
+from openai import AsyncOpenAI
+import litellm
+
+from langchain_openai import ChatOpenAI
+from langchain_litellm import ChatLiteLLM
+
+from shared.config import settings
+
+litellm.drop_params = True
+
+warnings.filterwarnings(
+    "ignore",
+    message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`",
+    category=UserWarning,
+)
+
+
+def _api_key_for_model(model: str) -> str | None:
+    if model.startswith("anthropic/"):
+        return settings.ANTHROPIC_API_KEY or None
+    if model.startswith("gemini/") or model.startswith("google/"):
+        return settings.GOOGLE_API_KEY or None
+    if model.startswith("cerebras/"):
+        return settings.CEREBRAS_API_KEY or None
+    if model.startswith("github/"):
+        return settings.GITHUB_TOKEN or None
+    if model.startswith("github_copilot/"):
+        return None
+    return settings.OPENAI_API_KEY or None
+
+
+def get_llm(
+    *,
+    model: str | None = None,
+    temperature: float = 0,
+    callbacks: list | None = None,
+) -> ChatOpenAI | ChatLiteLLM:
+    model = model or settings.LLM_MODEL
+
+    if settings.GITHUB_COPILOT_TOKEN_DIR:
+        os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
+
+    if settings.GITHUB_TOKEN:
+        os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
+
+    if "/" in model:
+        return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)
+
+    return ChatOpenAI(
+        model=model,
+        temperature=temperature,
+        api_key=_api_key_for_model(model),
+        callbacks=callbacks,
+    )
+
+
+async def embed(text: str) -> list[float]:
+    model = settings.LLM_EMBED_MODEL
+
+    if model.startswith("github_copilot/") or "/" in model:
+        response = await litellm.aembedding(model=model, input=[text])
+        return response.data[0]["embedding"]
+
+    client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
+    response = await client.embeddings.create(model=model, input=text)
+    return response.data[0].embedding
diff --git a/shared/config.py b/shared/config.py
index dd41ee2..567d8fe 100644
--- a/shared/config.py
+++ b/shared/config.py
@@ -62,6 +62,7 @@ class Settings(BaseSettings):
     ANTHROPIC_API_KEY: str = ""
     GOOGLE_API_KEY: str = ""
     CEREBRAS_API_KEY: str = ""
+    GITHUB_TOKEN: str = ""
 
     LLM_MODEL: str = "gpt-4o"
     LLM_EMBED_MODEL: str = "text-embedding-3-small"
diff --git a/shared/llm.py b/shared/llm.py
index e2ed26f..cae1491 100644
--- a/shared/llm.py
+++ b/shared/llm.py
@@ -33,6 +33,8 @@ def _api_key_for_model(model: str) -> str | None:
         return settings.GOOGLE_API_KEY or None
     if model.startswith("cerebras/"):
         return settings.CEREBRAS_API_KEY or None
+    if model.startswith("github/"):
+        return settings.GITHUB_TOKEN or None
     if model.startswith("github_copilot/"):
         return None
     return settings.OPENAI_API_KEY or None
@@ -49,6 +51,9 @@ def get_llm(
     if settings.GITHUB_COPILOT_TOKEN_DIR:
         os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
 
+    if settings.GITHUB_TOKEN:
+        os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
+
     if "/" in model:
         return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)