# Journey V2 eval test cases — Step 4 # # Each case simulates a complete journey session: # 1. handle_journey_start is called with directory + data_types # 2. handle_journey_message is called for each entry in user_messages # 3. Assertions are evaluated on the final reply # # directory_files: list of {path, content_file} — content_file is relative to data/ # # Assertion keys: # expect_question: true → first reply must contain "?" # expect_done: true → final reply must have done=True # expect_valid_config: true → agent_config must be parseable as AgentConfig with content_types > 0 # expect_content_type_id: → AgentConfig.content_types must contain an entry with this id # expect_extraction_contains: → first content_type extraction_prompt must contain this word # expect_global_rules: true → AgentConfig.global_rules must be non-empty - id: "4.1" description: "Journey start explores directory, first reply contains a question" directory: "/test/emails" data_types: ["tasks", "notes", "timelines"] directory_files: - path: "/test/emails/outlook_export_2024.html" content_file: "email_action.html" user_messages: [] score_name: "journey.start" expect_question: true - id: "4.2" description: "Full 3-turn conversation produces a valid AgentConfig JSON" directory: "/test/emails" data_types: ["tasks", "notes", "timelines"] directory_files: - path: "/test/emails/email_backup.html" content_file: "email_action.html" user_messages: - "These are email exports from Outlook in HTML format" - "Create tasks for emails with direct action requests, notes for informational emails" - "Yes, that looks correct. No other rules." score_name: "journey.valid_json" expect_done: true expect_valid_config: true - id: "4.3" description: "Journey detects email_html content type from directory exploration" directory: "/test/emails" data_types: ["tasks", "notes"] directory_files: - path: "/test/emails/message.html" content_file: "email_action.html" user_messages: - "HTML email backups from my mail client, exported from Outlook" - "Create tasks from emails that contain assignments or direct action items" - "Correct, no other rules needed" score_name: "journey.detect_email" expect_done: true expect_content_type_id: "email_html" - id: "4.4" description: "Custom user rule (only notes, no tasks) reflected in extraction_prompt" directory: "/test/emails" data_types: ["notes"] directory_files: - path: "/test/emails/email.html" content_file: "email_info.html" user_messages: - "HTML emails from my work inbox" - "Create only notes from all emails — I do not want tasks or timelines to be created" - "Yes, exactly" score_name: "journey.custom_rules" expect_done: true expect_extraction_contains: "note" - id: "4.5" description: "Global rule (no project = no entity) appears in AgentConfig.global_rules" directory: "/test/emails" data_types: ["tasks", "notes"] directory_files: - path: "/test/emails/email.html" content_file: "email_action.html" user_messages: - "Email backups from Outlook" - "Create tasks from action request emails, notes from informational emails" - "If the email cannot be matched to any project, do not create any entity at all" score_name: "journey.global_rules" expect_done: true expect_global_rules: true