api/services/batch-agent/eval/fixtures/freelance_invoices.yaml

# Fixture: freelance-invoices
# Tests extraction of tasks, notes, and timelines from
# invoices and meeting notes typical of a freelance workflow.

name: freelance-invoices
description: >
  Extract tasks, notes, and timeline events from Italian freelance
  invoices and meeting notes. Tests project matching, priority
  mapping, and bilingual content handling.

directory: sample_files/invoices
data_types: [tasks, notes, timelines]
file_extensions: [txt, md]

# Pre-existing records in the "database"
seed_records:
  projects:
    - id: "proj-web-redesign"
      name: "Redesign Sito Web Corporate"
      status: "active"
      aiSummary: "Corporate website redesign for Studio Architettura Bianchi"
    - id: "proj-ecommerce"
      name: "E-Commerce FashionStore"
      status: "active"
      aiSummary: "Next.js e-commerce platform for FashionStore srl"
  tasks: []
  notes: []
  timelines: []

# Prompt variations to compare
prompt_variants:
  baseline: |
    Extract action items as tasks and summaries as notes.
    For timelines, extract any mentioned dates and deadlines.
    Set isAiSuggested=1 on every record.

  detailed_italian: |
    Estrai i dati dai file come segue:
    - TASK: ogni azione da fare, deliverable, o item con scadenza.
      Mappa "URGENTE" o "ALTA PRIORITÀ" → priority: high.
      Mappa "media priorità" → priority: medium.
      Mappa "bassa priorità" → priority: low.
      Se un item è marcato come "completato" o [x], impostalo status: done.
      Altrimenti status: todo.
    - NOTE: riassunti di meeting, decisioni prese, note tecniche.
      Il titolo deve essere descrittivo. Il content deve includere tutti i dettagli.
    - TIMELINE: date di scadenza, milestone, meeting futuri.
      Formato data: timestamp Unix in millisecondi.
    Imposta sempre isAiSuggested=1.

  minimal: |
    Extract only high-priority action items as tasks.
    Ignore notes and timelines unless explicitly marked as important.
    Set isAiSuggested=1.

# Expected extractions (what the agent SHOULD produce)
# Only key fields are specified — scorer uses fuzzy matching
expected:
  tasks:
    - title: "Sviluppo frontend React"
      priority: "high"
      status: "todo"
    - title: "Integrazione API backend"
      priority: "medium"
      status: "todo"
    - title: "Testing cross-browser e fix bug responsive"
      status: "todo"
    - title: "Preparare wireframe homepage"
      priority: "high"
      status: "todo"
    - title: "Setup progetto Next.js e configurare CI/CD"
      priority: "medium"
      status: "todo"
    - title: "Ricerca plugin Stripe per gestione abbonamenti"
      priority: "low"
      status: "todo"

  notes:
    - title: "Meeting Kickoff Progetto E-Commerce"

  timelines:
    - title: "MVP E-Commerce pronto"
    - title: "Meeting di revisione"

# Models to test (can be overridden via CLI --models)
models: []