chore(tests): remove Langfuse scoring from preprocess tests

Scoring is only meaningful for LLM-backed steps. Preprocess tests are deterministic Python, so scores add no value. Kept only for detect tests. - test_preprocess: drop _lf_score call, simplify _run_assertions return type - cases.yaml: remove score_name from all op=preprocess entries Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 11:21:42 +02:00
parent bf445ac2ce
commit 3cc32569d9
2 changed files with 4 additions and 17 deletions
--- a/tests/test_preprocessors.py
+++ b/tests/test_preprocessors.py
@@ -75,11 +75,8 @@ def _lf_score(score_name: str, value: float, comment: str = "") -> None:

 # ── Assertion engine ──────────────────────────────────────────────────

-def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> tuple[float, list[str]]:
-    """Run all assertions declared in the YAML case.
-
-    Returns (score 0.0–1.0, list of failure messages).
-    """
+def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> list[str]:
+    """Run all assertions declared in the YAML case. Returns failure messages."""
    failures: list[str] = []

    if assertions.get("no_html_tags"):
@@ -124,8 +121,7 @@ def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> tuple[
            f"content_type mismatch: expected {expected_ct!r}, got {result.content_type!r}"
        )

-    score = 1.0 if not failures else 0.0
-    return score, failures
+    return failures


 # ── Parametrized: detect ──────────────────────────────────────────────
@@ -167,10 +163,7 @@ def test_preprocess(case: dict) -> None:
    result = preprocess(case["input_content_type"], raw)

    assertions = case.get("assertions", {})
-    score, failures = _run_assertions(assertions, result, raw)
-
-    comment = "; ".join(failures) if failures else f"len={len(result.clean_text)}"
-    _lf_score(case["score_name"], score, comment)
+    failures = _run_assertions(assertions, result, raw)

    assert not failures, (
        f"[{case['id']}] {case['description']} — {len(failures)} assertion(s) failed:\n"