Files
api/tests/fixtures/preprocessors/cases.yaml
Roberto Musso da282229ff refactor(tests): remove redundant filename field
file: serve sia come path da leggere che come nome passato a detect_content_type.
Non c'è motivo di averli separati.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 14:13:14 +02:00

69 lines
1.6 KiB
YAML

# Preprocessor test cases
#
# detect: <expected_type> → chiama detect_content_type(filename, content)
# process: <content_type> → chiama preprocess(content_type, content)
#
# Sorgente: file: <nome in data/> oppure generate: binary_noise
#
# Assertions piatte (solo per process):
# no_html: true clean_text senza tag HTML
# min_chars: N len(clean_text) >= N
# ratio_lt: F len(clean) / len(raw) < F
# has_meta: [k, ...] chiavi presenti in metadata
# contains: str | [str] substring(s) presenti in clean_text
# excludes: str | [str] substring(s) assenti da clean_text
# content_type: str result.content_type == questo valore
- id: "1.1"
file: email_action.html
detect: email_html
- id: "1.2"
file: generic_page.html
detect: generic_html
- id: "1.3"
file: notes.txt
detect: plain_text
- id: "1.4"
file: archive.xyz
generate: binary_noise
detect: unknown
- id: "1.5"
file: email_action.html
process: email_html
no_html: true
min_chars: 50
ratio_lt: 0.8
- id: "1.6"
file: email_action.html
process: email_html
has_meta: [subject, from]
- id: "1.7"
file: email_thread.html
process: email_html
contains: "Sure, I'll handle the deploy"
excludes: "Let's plan the deploy"
- id: "1.8"
file: email_single.html
process: email_html
contains: "deploy is done"
- id: "1.9"
file: email_heavy.html
process: email_html
no_html: true
min_chars: 30
excludes: [border-collapse, font-size]
- id: "1.10"
file: fallback.txt
process: unknown
min_chars: 1
content_type: unknown