From dcd14220ca2f7325b72748a9fa1cf70fe3712abe Mon Sep 17 00:00:00 2001 From: Roberto Musso Date: Tue, 7 Apr 2026 11:30:38 +0200 Subject: [PATCH] refactor(tests): simplify YAML fixture schema and test runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit YAML: rimosse op/description/score_name/assertions block — ora detect/process come chiave diretta, assertions piatte sullo stesso livello del caso. Runner: eliminato _run_assertions engine, assertions inline in test_preprocess. Riduzione da ~170 a ~75 righe totali tra YAML + test. Co-Authored-By: Claude Sonnet 4.6 --- tests/fixtures/preprocessors/cases.yaml | 166 ++++++++-------------- tests/test_preprocessors.py | 174 +++++++----------------- 2 files changed, 106 insertions(+), 234 deletions(-) diff --git a/tests/fixtures/preprocessors/cases.yaml b/tests/fixtures/preprocessors/cases.yaml index 75763aa..594d532 100644 --- a/tests/fixtures/preprocessors/cases.yaml +++ b/tests/fixtures/preprocessors/cases.yaml @@ -1,121 +1,71 @@ -# Preprocessor test cases — Step 1 (Local Agent V2) +# Preprocessor test cases # -# Schema per caso: -# id: "1.N" -# description: str -# score_name: str # nome score inviato a Langfuse +# detect: → chiama detect_content_type(filename, content) +# process: → chiama preprocess(content_type, content) # -# Sorgente contenuto (una delle due): -# file: # letto come testo UTF-8 -# generate: binary_noise # contenuto generato dal runner (per test binari) +# Sorgente: file: oppure generate: binary_noise +# filename: override del nome file passato a detect (default: valore di file:) # -# Per op=detect: -# op: detect -# input_filename: str # filename passato a detect_content_type -# expected_content_type: str -# -# Per op=preprocess: -# op: preprocess -# input_content_type: str # content_type passato a preprocess() -# assertions: -# no_html_tags: bool -# min_length: int -# compression_ratio_lt: float # len(clean) / len(raw) < soglia -# metadata_keys: [str, ...] # chiavi che devono essere in metadata -# contains: str | [str, ...] # substring(s) presenti in clean_text -# not_contains: str | [str, ...] # substring(s) assenti da clean_text -# content_type: str # valore atteso di result.content_type +# Assertions piatte (solo per process): +# no_html: true clean_text senza tag HTML +# min_chars: N len(clean_text) >= N +# ratio_lt: F len(clean) / len(raw) < F +# has_meta: [k, ...] chiavi presenti in metadata +# contains: str | [str] substring(s) presenti in clean_text +# excludes: str | [str] substring(s) assenti da clean_text +# content_type: str result.content_type == questo valore -cases: +- id: "1.1" + file: email_action.html + filename: email_export.html + detect: email_html - # ── Detection tests ──────────────────────────────────────────────── +- id: "1.2" + file: generic_page.html + filename: index.html + detect: generic_html - - id: "1.1" - description: "Detect email HTML" - score_name: preprocess.detect_email - file: email_action.html - op: detect - input_filename: email_export.html - expected_content_type: email_html +- id: "1.3" + file: notes.txt + detect: plain_text - - id: "1.2" - description: "Detect generic HTML" - score_name: preprocess.detect_generic - file: generic_page.html - op: detect - input_filename: index.html - expected_content_type: generic_html +- id: "1.4" + generate: binary_noise + filename: archive.xyz + detect: unknown - - id: "1.3" - description: "Detect plain text" - score_name: preprocess.detect_text - file: notes.txt - op: detect - input_filename: notes.txt - expected_content_type: plain_text +- id: "1.5" + file: email_action.html + process: email_html + no_html: true + min_chars: 50 + ratio_lt: 0.8 - - id: "1.4" - description: "Detect unknown (binary-like content)" - score_name: preprocess.detect_unknown - generate: binary_noise - op: detect - input_filename: archive.xyz - expected_content_type: unknown +- id: "1.6" + file: email_action.html + process: email_html + has_meta: [subject, from] - # ── Preprocess tests ─────────────────────────────────────────────── +- id: "1.7" + file: email_thread.html + process: email_html + contains: "Sure, I'll handle the deploy" + excludes: "Let's plan the deploy" - - id: "1.5" - description: "Email: strip HTML tags" - file: email_action.html - op: preprocess - input_content_type: email_html - assertions: - no_html_tags: true - min_length: 50 - compression_ratio_lt: 0.8 +- id: "1.8" + file: email_single.html + process: email_html + contains: "deploy is done" - - id: "1.6" - description: "Email: extract metadata (Subject + From)" - file: email_action.html - op: preprocess - input_content_type: email_html - assertions: - metadata_keys: [subject, from] +- id: "1.9" + file: email_heavy.html + process: email_html + no_html: true + min_chars: 30 + excludes: [border-collapse, font-size] - - id: "1.7" - description: "Email: split thread — solo ultimo messaggio" - file: email_thread.html - op: preprocess - input_content_type: email_html - assertions: - contains: "Sure, I'll handle the deploy" - not_contains: "Let's plan the deploy" - - - id: "1.8" - description: "Email: singolo messaggio senza thread" - file: email_single.html - op: preprocess - input_content_type: email_html - assertions: - contains: "deploy is done" - - - id: "1.9" - description: "Email: HTML pesante con table layout" - file: email_heavy.html - op: preprocess - input_content_type: email_html - assertions: - no_html_tags: true - min_length: 30 - not_contains: - - "border-collapse" - - "font-size" - - - id: "1.10" - description: "Fallback: file sconosciuto → testo restituito" - file: fallback.txt - op: preprocess - input_content_type: unknown - assertions: - min_length: 1 - content_type: unknown +- id: "1.10" + file: fallback.txt + process: unknown + min_chars: 1 + content_type: unknown diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py index 9ddc2a5..95440b1 100644 --- a/tests/test_preprocessors.py +++ b/tests/test_preprocessors.py @@ -1,26 +1,15 @@ """Tests for the preprocessor system (Step 1 — Local Agent V2). -Fixtures are driven by: - tests/fixtures/preprocessors/cases.yaml — test case definitions - tests/fixtures/preprocessors/data/ — input files (HTML, txt, ...) +Fixtures: tests/fixtures/preprocessors/cases.yaml + data/ Run: pytest tests/test_preprocessors.py -v - - # Only detection tests - pytest tests/test_preprocessors.py -v -k detect - - # Only preprocess tests - pytest tests/test_preprocessors.py -v -k preprocess - -Langfuse scores are sent when LANGFUSE_SECRET_KEY / LANGFUSE_PUBLIC_KEY are set. """ from __future__ import annotations import re from pathlib import Path -from typing import Any import pytest import yaml @@ -28,144 +17,77 @@ import yaml from app.core.langfuse_client import get_langfuse from app.core.preprocessors import detect_content_type, preprocess -# ── Paths ────────────────────────────────────────────────────────────── +_DATA_DIR = Path(__file__).parent / "fixtures" / "preprocessors" / "data" +_CASES_FILE = Path(__file__).parent / "fixtures" / "preprocessors" / "cases.yaml" -_FIXTURES_DIR = Path(__file__).parent / "fixtures" / "preprocessors" -_DATA_DIR = _FIXTURES_DIR / "data" -_CASES_FILE = _FIXTURES_DIR / "cases.yaml" - -# ── Content generators ───────────────────────────────────────────────── - -_GENERATORS: dict[str, str] = { - # High ratio of non-printable chars → triggers "unknown" heuristic +_GENERATORS = { "binary_noise": "some\x00\x01\x02\x03\x04\x05content" * 20, } -def _load_cases() -> list[dict]: - with _CASES_FILE.open(encoding="utf-8") as f: - return yaml.safe_load(f)["cases"] +def _cases(): + return yaml.safe_load(_CASES_FILE.read_text(encoding="utf-8")) -def _read_content(case: dict) -> str: +def _content(case: dict) -> str: if "generate" in case: - key = case["generate"] - if key not in _GENERATORS: - raise ValueError(f"Unknown generator '{key}' in case {case['id']}") - return _GENERATORS[key] - file_path = _DATA_DIR / case["file"] - return file_path.read_text(encoding="utf-8") + return _GENERATORS[case["generate"]] + return (_DATA_DIR / case["file"]).read_text(encoding="utf-8") -# ── Langfuse helper ─────────────────────────────────────────────────── - -def _lf_score(score_name: str, value: float, comment: str = "") -> None: +def _lf_score(name: str, value: float, comment: str = "") -> None: lf = get_langfuse() if lf: - trace = lf.trace(name=f"eval-{score_name}") - lf.score( - trace_id=trace.id, - name=score_name, - value=value, - data_type="NUMERIC", - comment=comment, - ) + trace = lf.trace(name=f"eval-{name}") + lf.score(trace_id=trace.id, name=name, value=value, data_type="NUMERIC", comment=comment) lf.flush() -# ── Assertion engine ────────────────────────────────────────────────── +# ── detect ──────────────────────────────────────────────────────────── -def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> list[str]: - """Run all assertions declared in the YAML case. Returns failure messages.""" - failures: list[str] = [] - - if assertions.get("no_html_tags"): - if re.search(r"<[^>]+>", result.clean_text): - failures.append("clean_text still contains HTML tags") - - min_len = assertions.get("min_length") - if min_len is not None: - if len(result.clean_text) < min_len: - failures.append( - f"clean_text too short: {len(result.clean_text)} < {min_len}" - ) - - ratio_lt = assertions.get("compression_ratio_lt") - if ratio_lt is not None and len(raw) > 0: - ratio = len(result.clean_text) / len(raw) - if ratio >= ratio_lt: - failures.append(f"compression ratio {ratio:.2f} >= {ratio_lt}") - - meta_keys = assertions.get("metadata_keys", []) - for key in meta_keys: - if not result.metadata.get(key): - failures.append(f"metadata missing key '{key}' (got {result.metadata})") - - contains = assertions.get("contains") - if contains: - items = [contains] if isinstance(contains, str) else contains - for item in items: - if item not in result.clean_text: - failures.append(f"clean_text missing expected substring: {item!r}") - - not_contains = assertions.get("not_contains") - if not_contains: - items = [not_contains] if isinstance(not_contains, str) else not_contains - for item in items: - if item in result.clean_text: - failures.append(f"clean_text contains forbidden substring: {item!r}") - - expected_ct = assertions.get("content_type") - if expected_ct and result.content_type != expected_ct: - failures.append( - f"content_type mismatch: expected {expected_ct!r}, got {result.content_type!r}" - ) - - return failures +_detect = [c for c in _cases() if "detect" in c] -# ── Parametrized: detect ────────────────────────────────────────────── - -_detect_cases = [c for c in _load_cases() if c["op"] == "detect"] - - -@pytest.mark.parametrize( - "case", - _detect_cases, - ids=[c["id"] for c in _detect_cases], -) +@pytest.mark.parametrize("case", _detect, ids=[c["id"] for c in _detect]) def test_detect(case: dict) -> None: - raw = _read_content(case) - ct = detect_content_type(case["input_filename"], raw) - - expected = case["expected_content_type"] - score = 1.0 if ct == expected else 0.0 - _lf_score(case["score_name"], score, f"got={ct}, expected={expected}") - - assert ct == expected, ( - f"[{case['id']}] {case['description']}: " - f"expected content_type={expected!r}, got {ct!r}" - ) + raw = _content(case) + filename = case.get("filename", case.get("file", "")) + ct = detect_content_type(filename, raw) + expected = case["detect"] + _lf_score(f"preprocess.detect.{case['id']}", 1.0 if ct == expected else 0.0) + assert ct == expected, f"[{case['id']}] expected {expected!r}, got {ct!r}" -# ── Parametrized: preprocess ────────────────────────────────────────── +# ── preprocess ──────────────────────────────────────────────────────── -_preprocess_cases = [c for c in _load_cases() if c["op"] == "preprocess"] +_process = [c for c in _cases() if "process" in c] -@pytest.mark.parametrize( - "case", - _preprocess_cases, - ids=[c["id"] for c in _preprocess_cases], -) +@pytest.mark.parametrize("case", _process, ids=[c["id"] for c in _process]) def test_preprocess(case: dict) -> None: - raw = _read_content(case) - result = preprocess(case["input_content_type"], raw) + raw = _content(case) + result = preprocess(case["process"], raw) - assertions = case.get("assertions", {}) - failures = _run_assertions(assertions, result, raw) + if case.get("no_html"): + assert not re.search(r"<[^>]+>", result.clean_text), "clean_text contains HTML tags" - assert not failures, ( - f"[{case['id']}] {case['description']} — {len(failures)} assertion(s) failed:\n" - + "\n".join(f" • {f}" for f in failures) - ) + if "min_chars" in case: + assert len(result.clean_text) >= case["min_chars"], \ + f"clean_text too short: {len(result.clean_text)} < {case['min_chars']}" + + if "ratio_lt" in case: + ratio = len(result.clean_text) / len(raw) + assert ratio < case["ratio_lt"], f"compression ratio {ratio:.2f} >= {case['ratio_lt']}" + + for key in case.get("has_meta", []): + assert result.metadata.get(key), f"metadata missing {key!r} (got {result.metadata})" + + for item in ([case["contains"]] if isinstance(case.get("contains"), str) else case.get("contains", [])): + assert item in result.clean_text, f"clean_text missing {item!r}" + + for item in ([case["excludes"]] if isinstance(case.get("excludes"), str) else case.get("excludes", [])): + assert item not in result.clean_text, f"clean_text contains forbidden {item!r}" + + if "content_type" in case: + assert result.content_type == case["content_type"], \ + f"expected content_type {case['content_type']!r}, got {result.content_type!r}"