From 3cc32569d9566f70a0b699c9da7f9db27945b13e Mon Sep 17 00:00:00 2001 From: Roberto Musso Date: Tue, 7 Apr 2026 11:21:42 +0200 Subject: [PATCH] chore(tests): remove Langfuse scoring from preprocess tests Scoring is only meaningful for LLM-backed steps. Preprocess tests are deterministic Python, so scores add no value. Kept only for detect tests. - test_preprocess: drop _lf_score call, simplify _run_assertions return type - cases.yaml: remove score_name from all op=preprocess entries Co-Authored-By: Claude Sonnet 4.6 --- tests/fixtures/preprocessors/cases.yaml | 6 ------ tests/test_preprocessors.py | 15 ++++----------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/tests/fixtures/preprocessors/cases.yaml b/tests/fixtures/preprocessors/cases.yaml index f40e84b..75763aa 100644 --- a/tests/fixtures/preprocessors/cases.yaml +++ b/tests/fixtures/preprocessors/cases.yaml @@ -66,7 +66,6 @@ cases: - id: "1.5" description: "Email: strip HTML tags" - score_name: preprocess.email_strip file: email_action.html op: preprocess input_content_type: email_html @@ -77,7 +76,6 @@ cases: - id: "1.6" description: "Email: extract metadata (Subject + From)" - score_name: preprocess.email_metadata file: email_action.html op: preprocess input_content_type: email_html @@ -86,7 +84,6 @@ cases: - id: "1.7" description: "Email: split thread — solo ultimo messaggio" - score_name: preprocess.email_thread file: email_thread.html op: preprocess input_content_type: email_html @@ -96,7 +93,6 @@ cases: - id: "1.8" description: "Email: singolo messaggio senza thread" - score_name: preprocess.email_single file: email_single.html op: preprocess input_content_type: email_html @@ -105,7 +101,6 @@ cases: - id: "1.9" description: "Email: HTML pesante con table layout" - score_name: preprocess.email_heavy_html file: email_heavy.html op: preprocess input_content_type: email_html @@ -118,7 +113,6 @@ cases: - id: "1.10" description: "Fallback: file sconosciuto → testo restituito" - score_name: preprocess.fallback file: fallback.txt op: preprocess input_content_type: unknown diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py index 00dcff8..9ddc2a5 100644 --- a/tests/test_preprocessors.py +++ b/tests/test_preprocessors.py @@ -75,11 +75,8 @@ def _lf_score(score_name: str, value: float, comment: str = "") -> None: # ── Assertion engine ────────────────────────────────────────────────── -def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> tuple[float, list[str]]: - """Run all assertions declared in the YAML case. - - Returns (score 0.0–1.0, list of failure messages). - """ +def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> list[str]: + """Run all assertions declared in the YAML case. Returns failure messages.""" failures: list[str] = [] if assertions.get("no_html_tags"): @@ -124,8 +121,7 @@ def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> tuple[ f"content_type mismatch: expected {expected_ct!r}, got {result.content_type!r}" ) - score = 1.0 if not failures else 0.0 - return score, failures + return failures # ── Parametrized: detect ────────────────────────────────────────────── @@ -167,10 +163,7 @@ def test_preprocess(case: dict) -> None: result = preprocess(case["input_content_type"], raw) assertions = case.get("assertions", {}) - score, failures = _run_assertions(assertions, result, raw) - - comment = "; ".join(failures) if failures else f"len={len(result.clean_text)}" - _lf_score(case["score_name"], score, comment) + failures = _run_assertions(assertions, result, raw) assert not failures, ( f"[{case['id']}] {case['description']} — {len(failures)} assertion(s) failed:\n"