fix(tests): migrate eval tests to Langfuse V3 API
lf.trace() and lf.score(trace_id=...) are V2 API removed in V3. V3 pattern: lf.start_as_current_observation(name=...) as context manager → obs obs.score(name=..., value=...) contextlib.nullcontext() when lf is None so structure stays the same Updated tests 2.1–2.7 in test_agent_runner_v2.py accordingly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -340,43 +340,51 @@ async def test_2_8_items_created_count():
|
|||||||
|
|
||||||
|
|
||||||
# ── Eval: 2.1–2.7 (real LLM + Langfuse scoring) ──────────────────────────
|
# ── Eval: 2.1–2.7 (real LLM + Langfuse scoring) ──────────────────────────
|
||||||
|
#
|
||||||
|
# Langfuse V3 pattern:
|
||||||
|
# lf.start_as_current_observation(name=...) as context manager → obs object
|
||||||
|
# obs.score(name=..., value=...) (not lf.score(trace_id=...))
|
||||||
|
# contextlib.nullcontext() when lf is None → obs is None, no-op
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.eval
|
@pytest.mark.eval
|
||||||
async def test_2_1_email_to_task():
|
async def test_2_1_email_to_task():
|
||||||
"""2.1 Action email → LLM calls create_task. Score: runner.email_to_task."""
|
"""2.1 Action email → LLM calls create_task. Score: runner.email_to_task."""
|
||||||
|
from contextlib import nullcontext
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
trace = lf.trace(
|
|
||||||
name="eval-runner-2.1-email-to-task",
|
|
||||||
metadata={"step": "2"},
|
|
||||||
) if lf else None
|
|
||||||
|
|
||||||
config = _make_config()
|
config = _make_config()
|
||||||
run_log = _make_run_log(config.id)
|
run_log = _make_run_log(config.id)
|
||||||
mgr = _make_manager()
|
mgr = _make_manager()
|
||||||
|
|
||||||
executor, calls = _make_executor(
|
executor, calls = _make_executor(
|
||||||
file_path="/emails/ProjectAlpha_action.html",
|
file_path="/emails/ProjectAlpha_action.html",
|
||||||
file_content=_ACTION_EMAIL,
|
file_content=_ACTION_EMAIL,
|
||||||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
obs_ctx = lf.start_as_current_observation(
|
||||||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock) as mock_fin:
|
name="eval-runner-2.1-email-to-task", metadata={"step": "2"}
|
||||||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
) if lf else nullcontext()
|
||||||
|
|
||||||
_, kwargs = mock_fin.call_args
|
with obs_ctx as obs:
|
||||||
task_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "tasks"]
|
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||||||
score = 1.0 if len(task_creates) >= 1 else 0.0
|
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock) as mock_fin:
|
||||||
|
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||||||
|
|
||||||
if lf and trace:
|
_, kwargs = mock_fin.call_args
|
||||||
lf.score(
|
task_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "tasks"]
|
||||||
trace_id=trace.id,
|
score = 1.0 if len(task_creates) >= 1 else 0.0
|
||||||
name="runner.email_to_task",
|
|
||||||
value=score,
|
if obs is not None:
|
||||||
comment=f"task_creates={len(task_creates)} items_created={kwargs.get('items_created')}",
|
obs.score(
|
||||||
)
|
name="runner.email_to_task",
|
||||||
|
value=score,
|
||||||
|
comment=f"task_creates={len(task_creates)} items_created={kwargs.get('items_created')}",
|
||||||
|
)
|
||||||
|
|
||||||
|
if lf:
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
assert score == 1.0, f"Expected at least 1 task created, got {len(task_creates)}"
|
assert score == 1.0, f"Expected at least 1 task created, got {len(task_creates)}"
|
||||||
@@ -386,29 +394,35 @@ async def test_2_1_email_to_task():
|
|||||||
@pytest.mark.eval
|
@pytest.mark.eval
|
||||||
async def test_2_2_email_to_note():
|
async def test_2_2_email_to_note():
|
||||||
"""2.2 Informational email → LLM calls create_note. Score: runner.email_to_note."""
|
"""2.2 Informational email → LLM calls create_note. Score: runner.email_to_note."""
|
||||||
|
from contextlib import nullcontext
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
trace = lf.trace(name="eval-runner-2.2-email-to-note", metadata={"step": "2"}) if lf else None
|
|
||||||
|
|
||||||
config = _make_config()
|
config = _make_config()
|
||||||
run_log = _make_run_log(config.id)
|
run_log = _make_run_log(config.id)
|
||||||
mgr = _make_manager()
|
mgr = _make_manager()
|
||||||
|
|
||||||
executor, calls = _make_executor(
|
executor, calls = _make_executor(
|
||||||
file_path="/emails/ProjectAlpha_info.html",
|
file_path="/emails/ProjectAlpha_info.html",
|
||||||
file_content=_INFO_EMAIL,
|
file_content=_INFO_EMAIL,
|
||||||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
obs_ctx = lf.start_as_current_observation(
|
||||||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
name="eval-runner-2.2-email-to-note", metadata={"step": "2"}
|
||||||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
) if lf else nullcontext()
|
||||||
|
|
||||||
note_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "notes"]
|
with obs_ctx as obs:
|
||||||
score = 1.0 if len(note_creates) >= 1 else 0.0
|
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||||||
|
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||||||
|
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||||||
|
|
||||||
if lf and trace:
|
note_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "notes"]
|
||||||
lf.score(trace_id=trace.id, name="runner.email_to_note", value=score,
|
score = 1.0 if len(note_creates) >= 1 else 0.0
|
||||||
comment=f"note_creates={len(note_creates)}")
|
|
||||||
|
if obs is not None:
|
||||||
|
obs.score(name="runner.email_to_note", value=score,
|
||||||
|
comment=f"note_creates={len(note_creates)}")
|
||||||
|
|
||||||
|
if lf:
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
assert score == 1.0, f"Expected at least 1 note created, got {len(note_creates)}"
|
assert score == 1.0, f"Expected at least 1 note created, got {len(note_creates)}"
|
||||||
@@ -418,29 +432,35 @@ async def test_2_2_email_to_note():
|
|||||||
@pytest.mark.eval
|
@pytest.mark.eval
|
||||||
async def test_2_3_email_to_timeline():
|
async def test_2_3_email_to_timeline():
|
||||||
"""2.3 Email with event date → LLM calls create_timeline. Score: runner.email_to_timeline."""
|
"""2.3 Email with event date → LLM calls create_timeline. Score: runner.email_to_timeline."""
|
||||||
|
from contextlib import nullcontext
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
trace = lf.trace(name="eval-runner-2.3-email-to-timeline", metadata={"step": "2"}) if lf else None
|
|
||||||
|
|
||||||
config = _make_config()
|
config = _make_config()
|
||||||
run_log = _make_run_log(config.id)
|
run_log = _make_run_log(config.id)
|
||||||
mgr = _make_manager()
|
mgr = _make_manager()
|
||||||
|
|
||||||
executor, calls = _make_executor(
|
executor, calls = _make_executor(
|
||||||
file_path="/emails/ProjectAlpha_kickoff.html",
|
file_path="/emails/ProjectAlpha_kickoff.html",
|
||||||
file_content=_DATE_EMAIL,
|
file_content=_DATE_EMAIL,
|
||||||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
obs_ctx = lf.start_as_current_observation(
|
||||||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
name="eval-runner-2.3-email-to-timeline", metadata={"step": "2"}
|
||||||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
) if lf else nullcontext()
|
||||||
|
|
||||||
tl_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "timelines"]
|
with obs_ctx as obs:
|
||||||
score = 1.0 if len(tl_creates) >= 1 else 0.0
|
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||||||
|
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||||||
|
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||||||
|
|
||||||
if lf and trace:
|
tl_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "timelines"]
|
||||||
lf.score(trace_id=trace.id, name="runner.email_to_timeline", value=score,
|
score = 1.0 if len(tl_creates) >= 1 else 0.0
|
||||||
comment=f"timeline_creates={len(tl_creates)}")
|
|
||||||
|
if obs is not None:
|
||||||
|
obs.score(name="runner.email_to_timeline", value=score,
|
||||||
|
comment=f"timeline_creates={len(tl_creates)}")
|
||||||
|
|
||||||
|
if lf:
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
assert score == 1.0, f"Expected at least 1 timeline created, got {len(tl_creates)}"
|
assert score == 1.0, f"Expected at least 1 timeline created, got {len(tl_creates)}"
|
||||||
@@ -450,33 +470,37 @@ async def test_2_3_email_to_timeline():
|
|||||||
@pytest.mark.eval
|
@pytest.mark.eval
|
||||||
async def test_2_4_project_matching_filename():
|
async def test_2_4_project_matching_filename():
|
||||||
"""2.4 Filename contains 'ProjectAlpha' → LLM assigns to proj-alpha. Score: runner.project_filename."""
|
"""2.4 Filename contains 'ProjectAlpha' → LLM assigns to proj-alpha. Score: runner.project_filename."""
|
||||||
|
from contextlib import nullcontext
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
trace = lf.trace(name="eval-runner-2.4-project-filename", metadata={"step": "2"}) if lf else None
|
|
||||||
|
|
||||||
config = _make_config()
|
config = _make_config()
|
||||||
run_log = _make_run_log(config.id)
|
run_log = _make_run_log(config.id)
|
||||||
mgr = _make_manager()
|
mgr = _make_manager()
|
||||||
|
|
||||||
executor, calls = _make_executor(
|
executor, calls = _make_executor(
|
||||||
file_path="/emails/ProjectAlpha_report.html",
|
file_path="/emails/ProjectAlpha_report.html",
|
||||||
file_content=_ACTION_EMAIL,
|
file_content=_ACTION_EMAIL,
|
||||||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
obs_ctx = lf.start_as_current_observation(
|
||||||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
name="eval-runner-2.4-project-filename", metadata={"step": "2"}
|
||||||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
) if lf else nullcontext()
|
||||||
|
|
||||||
# Check that project_id = proj-alpha was used in any insert
|
with obs_ctx as obs:
|
||||||
inserts = [c for c in calls if c["action"] == "insert"]
|
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||||||
correct_project = any(
|
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||||||
c.get("data", {}).get("projectId") == "proj-alpha"
|
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||||||
for c in inserts
|
|
||||||
)
|
|
||||||
score = 1.0 if correct_project else 0.0
|
|
||||||
|
|
||||||
if lf and trace:
|
inserts = [c for c in calls if c["action"] == "insert"]
|
||||||
lf.score(trace_id=trace.id, name="runner.project_filename", value=score)
|
correct_project = any(
|
||||||
|
c.get("data", {}).get("projectId") == "proj-alpha" for c in inserts
|
||||||
|
)
|
||||||
|
score = 1.0 if correct_project else 0.0
|
||||||
|
|
||||||
|
if obs is not None:
|
||||||
|
obs.score(name="runner.project_filename", value=score)
|
||||||
|
|
||||||
|
if lf:
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
assert score == 1.0, "Expected inserts to use proj-alpha based on filename"
|
assert score == 1.0, "Expected inserts to use proj-alpha based on filename"
|
||||||
@@ -486,32 +510,37 @@ async def test_2_4_project_matching_filename():
|
|||||||
@pytest.mark.eval
|
@pytest.mark.eval
|
||||||
async def test_2_5_project_matching_content():
|
async def test_2_5_project_matching_content():
|
||||||
"""2.5 Email body mentions 'Project Alpha' → correct project assigned. Score: runner.project_content."""
|
"""2.5 Email body mentions 'Project Alpha' → correct project assigned. Score: runner.project_content."""
|
||||||
|
from contextlib import nullcontext
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
trace = lf.trace(name="eval-runner-2.5-project-content", metadata={"step": "2"}) if lf else None
|
|
||||||
|
|
||||||
config = _make_config()
|
config = _make_config()
|
||||||
run_log = _make_run_log(config.id)
|
run_log = _make_run_log(config.id)
|
||||||
mgr = _make_manager()
|
mgr = _make_manager()
|
||||||
|
|
||||||
executor, calls = _make_executor(
|
executor, calls = _make_executor(
|
||||||
file_path="/emails/email_001.html", # generic filename, no project hint
|
file_path="/emails/email_001.html", # generic filename, no project hint
|
||||||
file_content=_ACTION_EMAIL, # body mentions "Project Alpha"
|
file_content=_ACTION_EMAIL, # body mentions "Project Alpha"
|
||||||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
obs_ctx = lf.start_as_current_observation(
|
||||||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
name="eval-runner-2.5-project-content", metadata={"step": "2"}
|
||||||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
) if lf else nullcontext()
|
||||||
|
|
||||||
inserts = [c for c in calls if c["action"] == "insert"]
|
with obs_ctx as obs:
|
||||||
correct_project = any(
|
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||||||
c.get("data", {}).get("projectId") == "proj-alpha"
|
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||||||
for c in inserts
|
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||||||
)
|
|
||||||
score = 1.0 if correct_project else 0.0
|
|
||||||
|
|
||||||
if lf and trace:
|
inserts = [c for c in calls if c["action"] == "insert"]
|
||||||
lf.score(trace_id=trace.id, name="runner.project_content", value=score)
|
correct_project = any(
|
||||||
|
c.get("data", {}).get("projectId") == "proj-alpha" for c in inserts
|
||||||
|
)
|
||||||
|
score = 1.0 if correct_project else 0.0
|
||||||
|
|
||||||
|
if obs is not None:
|
||||||
|
obs.score(name="runner.project_content", value=score)
|
||||||
|
|
||||||
|
if lf:
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
assert score == 1.0, "Expected inserts to use proj-alpha based on email body content"
|
assert score == 1.0, "Expected inserts to use proj-alpha based on email body content"
|
||||||
@@ -521,30 +550,35 @@ async def test_2_5_project_matching_content():
|
|||||||
@pytest.mark.eval
|
@pytest.mark.eval
|
||||||
async def test_2_6_no_project_match_global_rule():
|
async def test_2_6_no_project_match_global_rule():
|
||||||
"""2.6 Newsletter email + global rule 'no project = no entities' → no creates. Score: runner.no_project."""
|
"""2.6 Newsletter email + global rule 'no project = no entities' → no creates. Score: runner.no_project."""
|
||||||
|
from contextlib import nullcontext
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
trace = lf.trace(name="eval-runner-2.6-no-project", metadata={"step": "2"}) if lf else None
|
|
||||||
|
|
||||||
config = _make_config()
|
config = _make_config()
|
||||||
run_log = _make_run_log(config.id)
|
run_log = _make_run_log(config.id)
|
||||||
mgr = _make_manager()
|
mgr = _make_manager()
|
||||||
|
|
||||||
executor, calls = _make_executor(
|
executor, calls = _make_executor(
|
||||||
file_path="/emails/newsletter.html",
|
file_path="/emails/newsletter.html",
|
||||||
file_content=_NO_PROJECT_EMAIL,
|
file_content=_NO_PROJECT_EMAIL,
|
||||||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
obs_ctx = lf.start_as_current_observation(
|
||||||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock) as mock_fin:
|
name="eval-runner-2.6-no-project", metadata={"step": "2"}
|
||||||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
) if lf else nullcontext()
|
||||||
|
|
||||||
_, kwargs = mock_fin.call_args
|
with obs_ctx as obs:
|
||||||
inserts = [c for c in calls if c["action"] == "insert"]
|
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||||||
score = 1.0 if len(inserts) == 0 else 0.0
|
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||||||
|
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||||||
|
|
||||||
if lf and trace:
|
inserts = [c for c in calls if c["action"] == "insert"]
|
||||||
lf.score(trace_id=trace.id, name="runner.no_project", value=score,
|
score = 1.0 if len(inserts) == 0 else 0.0
|
||||||
comment=f"inserts={len(inserts)}")
|
|
||||||
|
if obs is not None:
|
||||||
|
obs.score(name="runner.no_project", value=score,
|
||||||
|
comment=f"inserts={len(inserts)}")
|
||||||
|
|
||||||
|
if lf:
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
assert score == 1.0, f"Expected 0 inserts for unmatched newsletter, got {len(inserts)}"
|
assert score == 1.0, f"Expected 0 inserts for unmatched newsletter, got {len(inserts)}"
|
||||||
@@ -554,32 +588,37 @@ async def test_2_6_no_project_match_global_rule():
|
|||||||
@pytest.mark.eval
|
@pytest.mark.eval
|
||||||
async def test_2_7_deduplication():
|
async def test_2_7_deduplication():
|
||||||
"""2.7 Existing task with same title → LLM calls update_task, not create_task. Score: runner.dedup."""
|
"""2.7 Existing task with same title → LLM calls update_task, not create_task. Score: runner.dedup."""
|
||||||
|
from contextlib import nullcontext
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
trace = lf.trace(name="eval-runner-2.7-dedup", metadata={"step": "2"}) if lf else None
|
|
||||||
|
|
||||||
config = _make_config()
|
config = _make_config()
|
||||||
run_log = _make_run_log(config.id)
|
run_log = _make_run_log(config.id)
|
||||||
mgr = _make_manager()
|
mgr = _make_manager()
|
||||||
|
|
||||||
executor, calls = _make_executor(
|
executor, calls = _make_executor(
|
||||||
file_path="/emails/ProjectAlpha_followup.html",
|
file_path="/emails/ProjectAlpha_followup.html",
|
||||||
file_content=_ACTION_EMAIL, # "Fix the login bug" — already exists
|
file_content=_ACTION_EMAIL, # "Fix the login bug" — already exists
|
||||||
projects=[_PROJECT_ALPHA],
|
projects=[_PROJECT_ALPHA],
|
||||||
existing_tasks=[_EXISTING_TASK], # task already exists
|
existing_tasks=[_EXISTING_TASK], # task already exists
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
obs_ctx = lf.start_as_current_observation(
|
||||||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
name="eval-runner-2.7-dedup", metadata={"step": "2"}
|
||||||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
) if lf else nullcontext()
|
||||||
|
|
||||||
task_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "tasks"]
|
with obs_ctx as obs:
|
||||||
task_updates = [c for c in calls if c["action"] == "update" and c.get("table") == "tasks"]
|
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||||||
# Prefer update over create
|
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||||||
score = 1.0 if len(task_creates) == 0 or len(task_updates) >= 1 else 0.0
|
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||||||
|
|
||||||
if lf and trace:
|
task_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "tasks"]
|
||||||
lf.score(trace_id=trace.id, name="runner.dedup", value=score,
|
task_updates = [c for c in calls if c["action"] == "update" and c.get("table") == "tasks"]
|
||||||
comment=f"creates={len(task_creates)} updates={len(task_updates)}")
|
score = 1.0 if len(task_creates) == 0 or len(task_updates) >= 1 else 0.0
|
||||||
|
|
||||||
|
if obs is not None:
|
||||||
|
obs.score(name="runner.dedup", value=score,
|
||||||
|
comment=f"creates={len(task_creates)} updates={len(task_updates)}")
|
||||||
|
|
||||||
|
if lf:
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
assert score == 1.0, (
|
assert score == 1.0, (
|
||||||
|
|||||||
Reference in New Issue
Block a user