chore(tests): remove Langfuse scoring from preprocess tests
Scoring is only meaningful for LLM-backed steps. Preprocess tests are deterministic Python, so scores add no value. Kept only for detect tests. - test_preprocess: drop _lf_score call, simplify _run_assertions return type - cases.yaml: remove score_name from all op=preprocess entries Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
6
tests/fixtures/preprocessors/cases.yaml
vendored
6
tests/fixtures/preprocessors/cases.yaml
vendored
@@ -66,7 +66,6 @@ cases:
|
|||||||
|
|
||||||
- id: "1.5"
|
- id: "1.5"
|
||||||
description: "Email: strip HTML tags"
|
description: "Email: strip HTML tags"
|
||||||
score_name: preprocess.email_strip
|
|
||||||
file: email_action.html
|
file: email_action.html
|
||||||
op: preprocess
|
op: preprocess
|
||||||
input_content_type: email_html
|
input_content_type: email_html
|
||||||
@@ -77,7 +76,6 @@ cases:
|
|||||||
|
|
||||||
- id: "1.6"
|
- id: "1.6"
|
||||||
description: "Email: extract metadata (Subject + From)"
|
description: "Email: extract metadata (Subject + From)"
|
||||||
score_name: preprocess.email_metadata
|
|
||||||
file: email_action.html
|
file: email_action.html
|
||||||
op: preprocess
|
op: preprocess
|
||||||
input_content_type: email_html
|
input_content_type: email_html
|
||||||
@@ -86,7 +84,6 @@ cases:
|
|||||||
|
|
||||||
- id: "1.7"
|
- id: "1.7"
|
||||||
description: "Email: split thread — solo ultimo messaggio"
|
description: "Email: split thread — solo ultimo messaggio"
|
||||||
score_name: preprocess.email_thread
|
|
||||||
file: email_thread.html
|
file: email_thread.html
|
||||||
op: preprocess
|
op: preprocess
|
||||||
input_content_type: email_html
|
input_content_type: email_html
|
||||||
@@ -96,7 +93,6 @@ cases:
|
|||||||
|
|
||||||
- id: "1.8"
|
- id: "1.8"
|
||||||
description: "Email: singolo messaggio senza thread"
|
description: "Email: singolo messaggio senza thread"
|
||||||
score_name: preprocess.email_single
|
|
||||||
file: email_single.html
|
file: email_single.html
|
||||||
op: preprocess
|
op: preprocess
|
||||||
input_content_type: email_html
|
input_content_type: email_html
|
||||||
@@ -105,7 +101,6 @@ cases:
|
|||||||
|
|
||||||
- id: "1.9"
|
- id: "1.9"
|
||||||
description: "Email: HTML pesante con table layout"
|
description: "Email: HTML pesante con table layout"
|
||||||
score_name: preprocess.email_heavy_html
|
|
||||||
file: email_heavy.html
|
file: email_heavy.html
|
||||||
op: preprocess
|
op: preprocess
|
||||||
input_content_type: email_html
|
input_content_type: email_html
|
||||||
@@ -118,7 +113,6 @@ cases:
|
|||||||
|
|
||||||
- id: "1.10"
|
- id: "1.10"
|
||||||
description: "Fallback: file sconosciuto → testo restituito"
|
description: "Fallback: file sconosciuto → testo restituito"
|
||||||
score_name: preprocess.fallback
|
|
||||||
file: fallback.txt
|
file: fallback.txt
|
||||||
op: preprocess
|
op: preprocess
|
||||||
input_content_type: unknown
|
input_content_type: unknown
|
||||||
|
|||||||
@@ -75,11 +75,8 @@ def _lf_score(score_name: str, value: float, comment: str = "") -> None:
|
|||||||
|
|
||||||
# ── Assertion engine ──────────────────────────────────────────────────
|
# ── Assertion engine ──────────────────────────────────────────────────
|
||||||
|
|
||||||
def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> tuple[float, list[str]]:
|
def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> list[str]:
|
||||||
"""Run all assertions declared in the YAML case.
|
"""Run all assertions declared in the YAML case. Returns failure messages."""
|
||||||
|
|
||||||
Returns (score 0.0–1.0, list of failure messages).
|
|
||||||
"""
|
|
||||||
failures: list[str] = []
|
failures: list[str] = []
|
||||||
|
|
||||||
if assertions.get("no_html_tags"):
|
if assertions.get("no_html_tags"):
|
||||||
@@ -124,8 +121,7 @@ def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> tuple[
|
|||||||
f"content_type mismatch: expected {expected_ct!r}, got {result.content_type!r}"
|
f"content_type mismatch: expected {expected_ct!r}, got {result.content_type!r}"
|
||||||
)
|
)
|
||||||
|
|
||||||
score = 1.0 if not failures else 0.0
|
return failures
|
||||||
return score, failures
|
|
||||||
|
|
||||||
|
|
||||||
# ── Parametrized: detect ──────────────────────────────────────────────
|
# ── Parametrized: detect ──────────────────────────────────────────────
|
||||||
@@ -167,10 +163,7 @@ def test_preprocess(case: dict) -> None:
|
|||||||
result = preprocess(case["input_content_type"], raw)
|
result = preprocess(case["input_content_type"], raw)
|
||||||
|
|
||||||
assertions = case.get("assertions", {})
|
assertions = case.get("assertions", {})
|
||||||
score, failures = _run_assertions(assertions, result, raw)
|
failures = _run_assertions(assertions, result, raw)
|
||||||
|
|
||||||
comment = "; ".join(failures) if failures else f"len={len(result.clean_text)}"
|
|
||||||
_lf_score(case["score_name"], score, comment)
|
|
||||||
|
|
||||||
assert not failures, (
|
assert not failures, (
|
||||||
f"[{case['id']}] {case['description']} — {len(failures)} assertion(s) failed:\n"
|
f"[{case['id']}] {case['description']} — {len(failures)} assertion(s) failed:\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user