From 3cc32569d9566f70a0b699c9da7f9db27945b13e Mon Sep 17 00:00:00 2001
From: Roberto Musso <roberto.musso@hpe.com>
Date: Tue, 7 Apr 2026 11:21:42 +0200
Subject: [PATCH] chore(tests): remove Langfuse scoring from preprocess tests

Scoring is only meaningful for LLM-backed steps. Preprocess tests are
deterministic Python, so scores add no value. Kept only for detect tests.

- test_preprocess: drop _lf_score call, simplify _run_assertions return type
- cases.yaml: remove score_name from all op=preprocess entries

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tests/fixtures/preprocessors/cases.yaml |  6 ------
 tests/test_preprocessors.py             | 15 ++++-----------
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/tests/fixtures/preprocessors/cases.yaml b/tests/fixtures/preprocessors/cases.yaml
index f40e84b..75763aa 100644
--- a/tests/fixtures/preprocessors/cases.yaml
+++ b/tests/fixtures/preprocessors/cases.yaml
@@ -66,7 +66,6 @@ cases:
 
   - id: "1.5"
     description: "Email: strip HTML tags"
-    score_name: preprocess.email_strip
     file: email_action.html
     op: preprocess
     input_content_type: email_html
@@ -77,7 +76,6 @@ cases:
 
   - id: "1.6"
     description: "Email: extract metadata (Subject + From)"
-    score_name: preprocess.email_metadata
     file: email_action.html
     op: preprocess
     input_content_type: email_html
@@ -86,7 +84,6 @@ cases:
 
   - id: "1.7"
     description: "Email: split thread — solo ultimo messaggio"
-    score_name: preprocess.email_thread
     file: email_thread.html
     op: preprocess
     input_content_type: email_html
@@ -96,7 +93,6 @@ cases:
 
   - id: "1.8"
     description: "Email: singolo messaggio senza thread"
-    score_name: preprocess.email_single
     file: email_single.html
     op: preprocess
     input_content_type: email_html
@@ -105,7 +101,6 @@ cases:
 
   - id: "1.9"
     description: "Email: HTML pesante con table layout"
-    score_name: preprocess.email_heavy_html
     file: email_heavy.html
     op: preprocess
     input_content_type: email_html
@@ -118,7 +113,6 @@ cases:
 
   - id: "1.10"
     description: "Fallback: file sconosciuto → testo restituito"
-    score_name: preprocess.fallback
     file: fallback.txt
     op: preprocess
     input_content_type: unknown
diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py
index 00dcff8..9ddc2a5 100644
--- a/tests/test_preprocessors.py
+++ b/tests/test_preprocessors.py
@@ -75,11 +75,8 @@ def _lf_score(score_name: str, value: float, comment: str = "") -> None:
 
 # ── Assertion engine ──────────────────────────────────────────────────
 
-def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> tuple[float, list[str]]:
-    """Run all assertions declared in the YAML case.
-
-    Returns (score 0.0–1.0, list of failure messages).
-    """
+def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> list[str]:
+    """Run all assertions declared in the YAML case. Returns failure messages."""
     failures: list[str] = []
 
     if assertions.get("no_html_tags"):
@@ -124,8 +121,7 @@ def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> tuple[
             f"content_type mismatch: expected {expected_ct!r}, got {result.content_type!r}"
         )
 
-    score = 1.0 if not failures else 0.0
-    return score, failures
+    return failures
 
 
 # ── Parametrized: detect ──────────────────────────────────────────────
@@ -167,10 +163,7 @@ def test_preprocess(case: dict) -> None:
     result = preprocess(case["input_content_type"], raw)
 
     assertions = case.get("assertions", {})
-    score, failures = _run_assertions(assertions, result, raw)
-
-    comment = "; ".join(failures) if failures else f"len={len(result.clean_text)}"
-    _lf_score(case["score_name"], score, comment)
+    failures = _run_assertions(assertions, result, raw)
 
     assert not failures, (
         f"[{case['id']}] {case['description']} — {len(failures)} assertion(s) failed:\n"