refactor(tests): simplify YAML fixture schema and test runner
YAML: rimosse op/description/score_name/assertions block — ora detect/process come chiave diretta, assertions piatte sullo stesso livello del caso. Runner: eliminato _run_assertions engine, assertions inline in test_preprocess. Riduzione da ~170 a ~75 righe totali tra YAML + test. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
166
tests/fixtures/preprocessors/cases.yaml
vendored
166
tests/fixtures/preprocessors/cases.yaml
vendored
@@ -1,121 +1,71 @@
|
||||
# Preprocessor test cases — Step 1 (Local Agent V2)
|
||||
# Preprocessor test cases
|
||||
#
|
||||
# Schema per caso:
|
||||
# id: "1.N"
|
||||
# description: str
|
||||
# score_name: str # nome score inviato a Langfuse
|
||||
# detect: <expected_type> → chiama detect_content_type(filename, content)
|
||||
# process: <content_type> → chiama preprocess(content_type, content)
|
||||
#
|
||||
# Sorgente contenuto (una delle due):
|
||||
# file: <nome file in data/> # letto come testo UTF-8
|
||||
# generate: binary_noise # contenuto generato dal runner (per test binari)
|
||||
# Sorgente: file: <nome in data/> oppure generate: binary_noise
|
||||
# filename: override del nome file passato a detect (default: valore di file:)
|
||||
#
|
||||
# Per op=detect:
|
||||
# op: detect
|
||||
# input_filename: str # filename passato a detect_content_type
|
||||
# expected_content_type: str
|
||||
#
|
||||
# Per op=preprocess:
|
||||
# op: preprocess
|
||||
# input_content_type: str # content_type passato a preprocess()
|
||||
# assertions:
|
||||
# no_html_tags: bool
|
||||
# min_length: int
|
||||
# compression_ratio_lt: float # len(clean) / len(raw) < soglia
|
||||
# metadata_keys: [str, ...] # chiavi che devono essere in metadata
|
||||
# contains: str | [str, ...] # substring(s) presenti in clean_text
|
||||
# not_contains: str | [str, ...] # substring(s) assenti da clean_text
|
||||
# content_type: str # valore atteso di result.content_type
|
||||
# Assertions piatte (solo per process):
|
||||
# no_html: true clean_text senza tag HTML
|
||||
# min_chars: N len(clean_text) >= N
|
||||
# ratio_lt: F len(clean) / len(raw) < F
|
||||
# has_meta: [k, ...] chiavi presenti in metadata
|
||||
# contains: str | [str] substring(s) presenti in clean_text
|
||||
# excludes: str | [str] substring(s) assenti da clean_text
|
||||
# content_type: str result.content_type == questo valore
|
||||
|
||||
cases:
|
||||
- id: "1.1"
|
||||
file: email_action.html
|
||||
filename: email_export.html
|
||||
detect: email_html
|
||||
|
||||
# ── Detection tests ────────────────────────────────────────────────
|
||||
- id: "1.2"
|
||||
file: generic_page.html
|
||||
filename: index.html
|
||||
detect: generic_html
|
||||
|
||||
- id: "1.1"
|
||||
description: "Detect email HTML"
|
||||
score_name: preprocess.detect_email
|
||||
file: email_action.html
|
||||
op: detect
|
||||
input_filename: email_export.html
|
||||
expected_content_type: email_html
|
||||
- id: "1.3"
|
||||
file: notes.txt
|
||||
detect: plain_text
|
||||
|
||||
- id: "1.2"
|
||||
description: "Detect generic HTML"
|
||||
score_name: preprocess.detect_generic
|
||||
file: generic_page.html
|
||||
op: detect
|
||||
input_filename: index.html
|
||||
expected_content_type: generic_html
|
||||
- id: "1.4"
|
||||
generate: binary_noise
|
||||
filename: archive.xyz
|
||||
detect: unknown
|
||||
|
||||
- id: "1.3"
|
||||
description: "Detect plain text"
|
||||
score_name: preprocess.detect_text
|
||||
file: notes.txt
|
||||
op: detect
|
||||
input_filename: notes.txt
|
||||
expected_content_type: plain_text
|
||||
- id: "1.5"
|
||||
file: email_action.html
|
||||
process: email_html
|
||||
no_html: true
|
||||
min_chars: 50
|
||||
ratio_lt: 0.8
|
||||
|
||||
- id: "1.4"
|
||||
description: "Detect unknown (binary-like content)"
|
||||
score_name: preprocess.detect_unknown
|
||||
generate: binary_noise
|
||||
op: detect
|
||||
input_filename: archive.xyz
|
||||
expected_content_type: unknown
|
||||
- id: "1.6"
|
||||
file: email_action.html
|
||||
process: email_html
|
||||
has_meta: [subject, from]
|
||||
|
||||
# ── Preprocess tests ───────────────────────────────────────────────
|
||||
- id: "1.7"
|
||||
file: email_thread.html
|
||||
process: email_html
|
||||
contains: "Sure, I'll handle the deploy"
|
||||
excludes: "Let's plan the deploy"
|
||||
|
||||
- id: "1.5"
|
||||
description: "Email: strip HTML tags"
|
||||
file: email_action.html
|
||||
op: preprocess
|
||||
input_content_type: email_html
|
||||
assertions:
|
||||
no_html_tags: true
|
||||
min_length: 50
|
||||
compression_ratio_lt: 0.8
|
||||
- id: "1.8"
|
||||
file: email_single.html
|
||||
process: email_html
|
||||
contains: "deploy is done"
|
||||
|
||||
- id: "1.6"
|
||||
description: "Email: extract metadata (Subject + From)"
|
||||
file: email_action.html
|
||||
op: preprocess
|
||||
input_content_type: email_html
|
||||
assertions:
|
||||
metadata_keys: [subject, from]
|
||||
- id: "1.9"
|
||||
file: email_heavy.html
|
||||
process: email_html
|
||||
no_html: true
|
||||
min_chars: 30
|
||||
excludes: [border-collapse, font-size]
|
||||
|
||||
- id: "1.7"
|
||||
description: "Email: split thread — solo ultimo messaggio"
|
||||
file: email_thread.html
|
||||
op: preprocess
|
||||
input_content_type: email_html
|
||||
assertions:
|
||||
contains: "Sure, I'll handle the deploy"
|
||||
not_contains: "Let's plan the deploy"
|
||||
|
||||
- id: "1.8"
|
||||
description: "Email: singolo messaggio senza thread"
|
||||
file: email_single.html
|
||||
op: preprocess
|
||||
input_content_type: email_html
|
||||
assertions:
|
||||
contains: "deploy is done"
|
||||
|
||||
- id: "1.9"
|
||||
description: "Email: HTML pesante con table layout"
|
||||
file: email_heavy.html
|
||||
op: preprocess
|
||||
input_content_type: email_html
|
||||
assertions:
|
||||
no_html_tags: true
|
||||
min_length: 30
|
||||
not_contains:
|
||||
- "border-collapse"
|
||||
- "font-size"
|
||||
|
||||
- id: "1.10"
|
||||
description: "Fallback: file sconosciuto → testo restituito"
|
||||
file: fallback.txt
|
||||
op: preprocess
|
||||
input_content_type: unknown
|
||||
assertions:
|
||||
min_length: 1
|
||||
content_type: unknown
|
||||
- id: "1.10"
|
||||
file: fallback.txt
|
||||
process: unknown
|
||||
min_chars: 1
|
||||
content_type: unknown
|
||||
|
||||
Reference in New Issue
Block a user