diff --git a/README.md b/README.md index e69de29..2565106 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,5 @@ +## DEV +Run in DEV with command: +``` +uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload --log-config logging.conf +``` \ No newline at end of file diff --git a/app/core/deep_agent.py b/app/core/deep_agent.py index 4f071a8..5f528f1 100644 --- a/app/core/deep_agent.py +++ b/app/core/deep_agent.py @@ -858,25 +858,15 @@ async def _run_single_agent_stream( _gen.update(output=_as_text(response.content), usage_details=extract_usage(response)) _gen_ctx.__exit__(None, None, None) - messages.append(response) - if not response.tool_calls: - emitted_any = False - async for chunk in llm.astream(messages): - token = _as_text(getattr(chunk, "content", "")) - if token: - streamed_chars += len(token) - streamed_text.append(token) - emitted_any = True - yield "token", token - - # Some providers return final text in `response.content` but stream no chunks. - if not emitted_any: - fallback_text = _as_text(response.content) - if fallback_text: - streamed_chars += len(fallback_text) - streamed_text.append(fallback_text) - yield "token", fallback_text + # Yield the content from the ainvoke response directly — no second LLM call. + # Previously, messages.append(response) was called first, so the re-stream + # received [System, Human, AI] and regenerated a response without tools bound. + final_text = _as_text(response.content) + if final_text: + streamed_chars += len(final_text) + streamed_text.append(final_text) + yield "token", final_text logger.info( "deep_agent: run_single_agent_stream_end trace=%s user=%s tool_calls=%d response_chars=%d", trace_id or "-", @@ -888,6 +878,7 @@ async def _run_single_agent_stream( _span.update(output="".join(streamed_text)) return + messages.append(response) tool_map = {tool_def.name: tool_def for tool_def in tools} for call in response.tool_calls: tool_calls_count += 1