refactor(tests): simplify YAML fixture schema and test runner
YAML: rimosse op/description/score_name/assertions block — ora detect/process come chiave diretta, assertions piatte sullo stesso livello del caso. Runner: eliminato _run_assertions engine, assertions inline in test_preprocess. Riduzione da ~170 a ~75 righe totali tra YAML + test. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
166
tests/fixtures/preprocessors/cases.yaml
vendored
166
tests/fixtures/preprocessors/cases.yaml
vendored
@@ -1,121 +1,71 @@
|
|||||||
# Preprocessor test cases — Step 1 (Local Agent V2)
|
# Preprocessor test cases
|
||||||
#
|
#
|
||||||
# Schema per caso:
|
# detect: <expected_type> → chiama detect_content_type(filename, content)
|
||||||
# id: "1.N"
|
# process: <content_type> → chiama preprocess(content_type, content)
|
||||||
# description: str
|
|
||||||
# score_name: str # nome score inviato a Langfuse
|
|
||||||
#
|
#
|
||||||
# Sorgente contenuto (una delle due):
|
# Sorgente: file: <nome in data/> oppure generate: binary_noise
|
||||||
# file: <nome file in data/> # letto come testo UTF-8
|
# filename: override del nome file passato a detect (default: valore di file:)
|
||||||
# generate: binary_noise # contenuto generato dal runner (per test binari)
|
|
||||||
#
|
#
|
||||||
# Per op=detect:
|
# Assertions piatte (solo per process):
|
||||||
# op: detect
|
# no_html: true clean_text senza tag HTML
|
||||||
# input_filename: str # filename passato a detect_content_type
|
# min_chars: N len(clean_text) >= N
|
||||||
# expected_content_type: str
|
# ratio_lt: F len(clean) / len(raw) < F
|
||||||
#
|
# has_meta: [k, ...] chiavi presenti in metadata
|
||||||
# Per op=preprocess:
|
# contains: str | [str] substring(s) presenti in clean_text
|
||||||
# op: preprocess
|
# excludes: str | [str] substring(s) assenti da clean_text
|
||||||
# input_content_type: str # content_type passato a preprocess()
|
# content_type: str result.content_type == questo valore
|
||||||
# assertions:
|
|
||||||
# no_html_tags: bool
|
|
||||||
# min_length: int
|
|
||||||
# compression_ratio_lt: float # len(clean) / len(raw) < soglia
|
|
||||||
# metadata_keys: [str, ...] # chiavi che devono essere in metadata
|
|
||||||
# contains: str | [str, ...] # substring(s) presenti in clean_text
|
|
||||||
# not_contains: str | [str, ...] # substring(s) assenti da clean_text
|
|
||||||
# content_type: str # valore atteso di result.content_type
|
|
||||||
|
|
||||||
cases:
|
- id: "1.1"
|
||||||
|
file: email_action.html
|
||||||
|
filename: email_export.html
|
||||||
|
detect: email_html
|
||||||
|
|
||||||
# ── Detection tests ────────────────────────────────────────────────
|
- id: "1.2"
|
||||||
|
file: generic_page.html
|
||||||
|
filename: index.html
|
||||||
|
detect: generic_html
|
||||||
|
|
||||||
- id: "1.1"
|
- id: "1.3"
|
||||||
description: "Detect email HTML"
|
file: notes.txt
|
||||||
score_name: preprocess.detect_email
|
detect: plain_text
|
||||||
file: email_action.html
|
|
||||||
op: detect
|
|
||||||
input_filename: email_export.html
|
|
||||||
expected_content_type: email_html
|
|
||||||
|
|
||||||
- id: "1.2"
|
- id: "1.4"
|
||||||
description: "Detect generic HTML"
|
generate: binary_noise
|
||||||
score_name: preprocess.detect_generic
|
filename: archive.xyz
|
||||||
file: generic_page.html
|
detect: unknown
|
||||||
op: detect
|
|
||||||
input_filename: index.html
|
|
||||||
expected_content_type: generic_html
|
|
||||||
|
|
||||||
- id: "1.3"
|
- id: "1.5"
|
||||||
description: "Detect plain text"
|
file: email_action.html
|
||||||
score_name: preprocess.detect_text
|
process: email_html
|
||||||
file: notes.txt
|
no_html: true
|
||||||
op: detect
|
min_chars: 50
|
||||||
input_filename: notes.txt
|
ratio_lt: 0.8
|
||||||
expected_content_type: plain_text
|
|
||||||
|
|
||||||
- id: "1.4"
|
- id: "1.6"
|
||||||
description: "Detect unknown (binary-like content)"
|
file: email_action.html
|
||||||
score_name: preprocess.detect_unknown
|
process: email_html
|
||||||
generate: binary_noise
|
has_meta: [subject, from]
|
||||||
op: detect
|
|
||||||
input_filename: archive.xyz
|
|
||||||
expected_content_type: unknown
|
|
||||||
|
|
||||||
# ── Preprocess tests ───────────────────────────────────────────────
|
- id: "1.7"
|
||||||
|
file: email_thread.html
|
||||||
|
process: email_html
|
||||||
|
contains: "Sure, I'll handle the deploy"
|
||||||
|
excludes: "Let's plan the deploy"
|
||||||
|
|
||||||
- id: "1.5"
|
- id: "1.8"
|
||||||
description: "Email: strip HTML tags"
|
file: email_single.html
|
||||||
file: email_action.html
|
process: email_html
|
||||||
op: preprocess
|
contains: "deploy is done"
|
||||||
input_content_type: email_html
|
|
||||||
assertions:
|
|
||||||
no_html_tags: true
|
|
||||||
min_length: 50
|
|
||||||
compression_ratio_lt: 0.8
|
|
||||||
|
|
||||||
- id: "1.6"
|
- id: "1.9"
|
||||||
description: "Email: extract metadata (Subject + From)"
|
file: email_heavy.html
|
||||||
file: email_action.html
|
process: email_html
|
||||||
op: preprocess
|
no_html: true
|
||||||
input_content_type: email_html
|
min_chars: 30
|
||||||
assertions:
|
excludes: [border-collapse, font-size]
|
||||||
metadata_keys: [subject, from]
|
|
||||||
|
|
||||||
- id: "1.7"
|
- id: "1.10"
|
||||||
description: "Email: split thread — solo ultimo messaggio"
|
file: fallback.txt
|
||||||
file: email_thread.html
|
process: unknown
|
||||||
op: preprocess
|
min_chars: 1
|
||||||
input_content_type: email_html
|
content_type: unknown
|
||||||
assertions:
|
|
||||||
contains: "Sure, I'll handle the deploy"
|
|
||||||
not_contains: "Let's plan the deploy"
|
|
||||||
|
|
||||||
- id: "1.8"
|
|
||||||
description: "Email: singolo messaggio senza thread"
|
|
||||||
file: email_single.html
|
|
||||||
op: preprocess
|
|
||||||
input_content_type: email_html
|
|
||||||
assertions:
|
|
||||||
contains: "deploy is done"
|
|
||||||
|
|
||||||
- id: "1.9"
|
|
||||||
description: "Email: HTML pesante con table layout"
|
|
||||||
file: email_heavy.html
|
|
||||||
op: preprocess
|
|
||||||
input_content_type: email_html
|
|
||||||
assertions:
|
|
||||||
no_html_tags: true
|
|
||||||
min_length: 30
|
|
||||||
not_contains:
|
|
||||||
- "border-collapse"
|
|
||||||
- "font-size"
|
|
||||||
|
|
||||||
- id: "1.10"
|
|
||||||
description: "Fallback: file sconosciuto → testo restituito"
|
|
||||||
file: fallback.txt
|
|
||||||
op: preprocess
|
|
||||||
input_content_type: unknown
|
|
||||||
assertions:
|
|
||||||
min_length: 1
|
|
||||||
content_type: unknown
|
|
||||||
|
|||||||
@@ -1,26 +1,15 @@
|
|||||||
"""Tests for the preprocessor system (Step 1 — Local Agent V2).
|
"""Tests for the preprocessor system (Step 1 — Local Agent V2).
|
||||||
|
|
||||||
Fixtures are driven by:
|
Fixtures: tests/fixtures/preprocessors/cases.yaml + data/
|
||||||
tests/fixtures/preprocessors/cases.yaml — test case definitions
|
|
||||||
tests/fixtures/preprocessors/data/ — input files (HTML, txt, ...)
|
|
||||||
|
|
||||||
Run:
|
Run:
|
||||||
pytest tests/test_preprocessors.py -v
|
pytest tests/test_preprocessors.py -v
|
||||||
|
|
||||||
# Only detection tests
|
|
||||||
pytest tests/test_preprocessors.py -v -k detect
|
|
||||||
|
|
||||||
# Only preprocess tests
|
|
||||||
pytest tests/test_preprocessors.py -v -k preprocess
|
|
||||||
|
|
||||||
Langfuse scores are sent when LANGFUSE_SECRET_KEY / LANGFUSE_PUBLIC_KEY are set.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import yaml
|
import yaml
|
||||||
@@ -28,144 +17,77 @@ import yaml
|
|||||||
from app.core.langfuse_client import get_langfuse
|
from app.core.langfuse_client import get_langfuse
|
||||||
from app.core.preprocessors import detect_content_type, preprocess
|
from app.core.preprocessors import detect_content_type, preprocess
|
||||||
|
|
||||||
# ── Paths ──────────────────────────────────────────────────────────────
|
_DATA_DIR = Path(__file__).parent / "fixtures" / "preprocessors" / "data"
|
||||||
|
_CASES_FILE = Path(__file__).parent / "fixtures" / "preprocessors" / "cases.yaml"
|
||||||
|
|
||||||
_FIXTURES_DIR = Path(__file__).parent / "fixtures" / "preprocessors"
|
_GENERATORS = {
|
||||||
_DATA_DIR = _FIXTURES_DIR / "data"
|
|
||||||
_CASES_FILE = _FIXTURES_DIR / "cases.yaml"
|
|
||||||
|
|
||||||
# ── Content generators ─────────────────────────────────────────────────
|
|
||||||
|
|
||||||
_GENERATORS: dict[str, str] = {
|
|
||||||
# High ratio of non-printable chars → triggers "unknown" heuristic
|
|
||||||
"binary_noise": "some\x00\x01\x02\x03\x04\x05content" * 20,
|
"binary_noise": "some\x00\x01\x02\x03\x04\x05content" * 20,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _load_cases() -> list[dict]:
|
def _cases():
|
||||||
with _CASES_FILE.open(encoding="utf-8") as f:
|
return yaml.safe_load(_CASES_FILE.read_text(encoding="utf-8"))
|
||||||
return yaml.safe_load(f)["cases"]
|
|
||||||
|
|
||||||
|
|
||||||
def _read_content(case: dict) -> str:
|
def _content(case: dict) -> str:
|
||||||
if "generate" in case:
|
if "generate" in case:
|
||||||
key = case["generate"]
|
return _GENERATORS[case["generate"]]
|
||||||
if key not in _GENERATORS:
|
return (_DATA_DIR / case["file"]).read_text(encoding="utf-8")
|
||||||
raise ValueError(f"Unknown generator '{key}' in case {case['id']}")
|
|
||||||
return _GENERATORS[key]
|
|
||||||
file_path = _DATA_DIR / case["file"]
|
|
||||||
return file_path.read_text(encoding="utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
# ── Langfuse helper ───────────────────────────────────────────────────
|
def _lf_score(name: str, value: float, comment: str = "") -> None:
|
||||||
|
|
||||||
def _lf_score(score_name: str, value: float, comment: str = "") -> None:
|
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
if lf:
|
if lf:
|
||||||
trace = lf.trace(name=f"eval-{score_name}")
|
trace = lf.trace(name=f"eval-{name}")
|
||||||
lf.score(
|
lf.score(trace_id=trace.id, name=name, value=value, data_type="NUMERIC", comment=comment)
|
||||||
trace_id=trace.id,
|
|
||||||
name=score_name,
|
|
||||||
value=value,
|
|
||||||
data_type="NUMERIC",
|
|
||||||
comment=comment,
|
|
||||||
)
|
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
|
|
||||||
# ── Assertion engine ──────────────────────────────────────────────────
|
# ── detect ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def _run_assertions(assertions: dict[str, Any], result: Any, raw: str) -> list[str]:
|
_detect = [c for c in _cases() if "detect" in c]
|
||||||
"""Run all assertions declared in the YAML case. Returns failure messages."""
|
|
||||||
failures: list[str] = []
|
|
||||||
|
|
||||||
if assertions.get("no_html_tags"):
|
|
||||||
if re.search(r"<[^>]+>", result.clean_text):
|
|
||||||
failures.append("clean_text still contains HTML tags")
|
|
||||||
|
|
||||||
min_len = assertions.get("min_length")
|
|
||||||
if min_len is not None:
|
|
||||||
if len(result.clean_text) < min_len:
|
|
||||||
failures.append(
|
|
||||||
f"clean_text too short: {len(result.clean_text)} < {min_len}"
|
|
||||||
)
|
|
||||||
|
|
||||||
ratio_lt = assertions.get("compression_ratio_lt")
|
|
||||||
if ratio_lt is not None and len(raw) > 0:
|
|
||||||
ratio = len(result.clean_text) / len(raw)
|
|
||||||
if ratio >= ratio_lt:
|
|
||||||
failures.append(f"compression ratio {ratio:.2f} >= {ratio_lt}")
|
|
||||||
|
|
||||||
meta_keys = assertions.get("metadata_keys", [])
|
|
||||||
for key in meta_keys:
|
|
||||||
if not result.metadata.get(key):
|
|
||||||
failures.append(f"metadata missing key '{key}' (got {result.metadata})")
|
|
||||||
|
|
||||||
contains = assertions.get("contains")
|
|
||||||
if contains:
|
|
||||||
items = [contains] if isinstance(contains, str) else contains
|
|
||||||
for item in items:
|
|
||||||
if item not in result.clean_text:
|
|
||||||
failures.append(f"clean_text missing expected substring: {item!r}")
|
|
||||||
|
|
||||||
not_contains = assertions.get("not_contains")
|
|
||||||
if not_contains:
|
|
||||||
items = [not_contains] if isinstance(not_contains, str) else not_contains
|
|
||||||
for item in items:
|
|
||||||
if item in result.clean_text:
|
|
||||||
failures.append(f"clean_text contains forbidden substring: {item!r}")
|
|
||||||
|
|
||||||
expected_ct = assertions.get("content_type")
|
|
||||||
if expected_ct and result.content_type != expected_ct:
|
|
||||||
failures.append(
|
|
||||||
f"content_type mismatch: expected {expected_ct!r}, got {result.content_type!r}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return failures
|
|
||||||
|
|
||||||
|
|
||||||
# ── Parametrized: detect ──────────────────────────────────────────────
|
@pytest.mark.parametrize("case", _detect, ids=[c["id"] for c in _detect])
|
||||||
|
|
||||||
_detect_cases = [c for c in _load_cases() if c["op"] == "detect"]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"case",
|
|
||||||
_detect_cases,
|
|
||||||
ids=[c["id"] for c in _detect_cases],
|
|
||||||
)
|
|
||||||
def test_detect(case: dict) -> None:
|
def test_detect(case: dict) -> None:
|
||||||
raw = _read_content(case)
|
raw = _content(case)
|
||||||
ct = detect_content_type(case["input_filename"], raw)
|
filename = case.get("filename", case.get("file", ""))
|
||||||
|
ct = detect_content_type(filename, raw)
|
||||||
expected = case["expected_content_type"]
|
expected = case["detect"]
|
||||||
score = 1.0 if ct == expected else 0.0
|
_lf_score(f"preprocess.detect.{case['id']}", 1.0 if ct == expected else 0.0)
|
||||||
_lf_score(case["score_name"], score, f"got={ct}, expected={expected}")
|
assert ct == expected, f"[{case['id']}] expected {expected!r}, got {ct!r}"
|
||||||
|
|
||||||
assert ct == expected, (
|
|
||||||
f"[{case['id']}] {case['description']}: "
|
|
||||||
f"expected content_type={expected!r}, got {ct!r}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ── Parametrized: preprocess ──────────────────────────────────────────
|
# ── preprocess ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
_preprocess_cases = [c for c in _load_cases() if c["op"] == "preprocess"]
|
_process = [c for c in _cases() if "process" in c]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize("case", _process, ids=[c["id"] for c in _process])
|
||||||
"case",
|
|
||||||
_preprocess_cases,
|
|
||||||
ids=[c["id"] for c in _preprocess_cases],
|
|
||||||
)
|
|
||||||
def test_preprocess(case: dict) -> None:
|
def test_preprocess(case: dict) -> None:
|
||||||
raw = _read_content(case)
|
raw = _content(case)
|
||||||
result = preprocess(case["input_content_type"], raw)
|
result = preprocess(case["process"], raw)
|
||||||
|
|
||||||
assertions = case.get("assertions", {})
|
if case.get("no_html"):
|
||||||
failures = _run_assertions(assertions, result, raw)
|
assert not re.search(r"<[^>]+>", result.clean_text), "clean_text contains HTML tags"
|
||||||
|
|
||||||
assert not failures, (
|
if "min_chars" in case:
|
||||||
f"[{case['id']}] {case['description']} — {len(failures)} assertion(s) failed:\n"
|
assert len(result.clean_text) >= case["min_chars"], \
|
||||||
+ "\n".join(f" • {f}" for f in failures)
|
f"clean_text too short: {len(result.clean_text)} < {case['min_chars']}"
|
||||||
)
|
|
||||||
|
if "ratio_lt" in case:
|
||||||
|
ratio = len(result.clean_text) / len(raw)
|
||||||
|
assert ratio < case["ratio_lt"], f"compression ratio {ratio:.2f} >= {case['ratio_lt']}"
|
||||||
|
|
||||||
|
for key in case.get("has_meta", []):
|
||||||
|
assert result.metadata.get(key), f"metadata missing {key!r} (got {result.metadata})"
|
||||||
|
|
||||||
|
for item in ([case["contains"]] if isinstance(case.get("contains"), str) else case.get("contains", [])):
|
||||||
|
assert item in result.clean_text, f"clean_text missing {item!r}"
|
||||||
|
|
||||||
|
for item in ([case["excludes"]] if isinstance(case.get("excludes"), str) else case.get("excludes", [])):
|
||||||
|
assert item not in result.clean_text, f"clean_text contains forbidden {item!r}"
|
||||||
|
|
||||||
|
if "content_type" in case:
|
||||||
|
assert result.content_type == case["content_type"], \
|
||||||
|
f"expected content_type {case['content_type']!r}, got {result.content_type!r}"
|
||||||
|
|||||||
Reference in New Issue
Block a user