Compare commits
2 Commits
d5fea95561
...
2c7cac9e03
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c7cac9e03 | ||
|
|
ea9094f47f |
@@ -21,6 +21,8 @@ OPENAI_API_KEY=
|
|||||||
ANTHROPIC_API_KEY=
|
ANTHROPIC_API_KEY=
|
||||||
GOOGLE_API_KEY=
|
GOOGLE_API_KEY=
|
||||||
CEREBRAS_API_KEY=
|
CEREBRAS_API_KEY=
|
||||||
|
GROQ_API_KEY=
|
||||||
|
DEEPSEEK_API_KEY=
|
||||||
|
|
||||||
# Default model used by any agent that does not have a specific override below.
|
# Default model used by any agent that does not have a specific override below.
|
||||||
LLM_MODEL=gpt-5-mini
|
LLM_MODEL=gpt-5-mini
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
## DEV
|
||||||
|
Run in DEV with command:
|
||||||
|
```
|
||||||
|
uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload --log-config logging.conf
|
||||||
|
```
|
||||||
@@ -16,6 +16,8 @@ class Settings(BaseSettings):
|
|||||||
ANTHROPIC_API_KEY: str = ""
|
ANTHROPIC_API_KEY: str = ""
|
||||||
GOOGLE_API_KEY: str = ""
|
GOOGLE_API_KEY: str = ""
|
||||||
CEREBRAS_API_KEY: str = ""
|
CEREBRAS_API_KEY: str = ""
|
||||||
|
GROQ_API_KEY: str = ""
|
||||||
|
DEEPSEEK_API_KEY: str = ""
|
||||||
|
|
||||||
LLM_MODEL: str = "gpt-4o"
|
LLM_MODEL: str = "gpt-4o"
|
||||||
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
||||||
|
|||||||
@@ -858,25 +858,15 @@ async def _run_single_agent_stream(
|
|||||||
_gen.update(output=_as_text(response.content), usage_details=extract_usage(response))
|
_gen.update(output=_as_text(response.content), usage_details=extract_usage(response))
|
||||||
_gen_ctx.__exit__(None, None, None)
|
_gen_ctx.__exit__(None, None, None)
|
||||||
|
|
||||||
messages.append(response)
|
|
||||||
|
|
||||||
if not response.tool_calls:
|
if not response.tool_calls:
|
||||||
emitted_any = False
|
# Yield the content from the ainvoke response directly — no second LLM call.
|
||||||
async for chunk in llm.astream(messages):
|
# Previously, messages.append(response) was called first, so the re-stream
|
||||||
token = _as_text(getattr(chunk, "content", ""))
|
# received [System, Human, AI] and regenerated a response without tools bound.
|
||||||
if token:
|
final_text = _as_text(response.content)
|
||||||
streamed_chars += len(token)
|
if final_text:
|
||||||
streamed_text.append(token)
|
streamed_chars += len(final_text)
|
||||||
emitted_any = True
|
streamed_text.append(final_text)
|
||||||
yield "token", token
|
yield "token", final_text
|
||||||
|
|
||||||
# Some providers return final text in `response.content` but stream no chunks.
|
|
||||||
if not emitted_any:
|
|
||||||
fallback_text = _as_text(response.content)
|
|
||||||
if fallback_text:
|
|
||||||
streamed_chars += len(fallback_text)
|
|
||||||
streamed_text.append(fallback_text)
|
|
||||||
yield "token", fallback_text
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"deep_agent: run_single_agent_stream_end trace=%s user=%s tool_calls=%d response_chars=%d",
|
"deep_agent: run_single_agent_stream_end trace=%s user=%s tool_calls=%d response_chars=%d",
|
||||||
trace_id or "-",
|
trace_id or "-",
|
||||||
@@ -888,6 +878,7 @@ async def _run_single_agent_stream(
|
|||||||
_span.update(output="".join(streamed_text))
|
_span.update(output="".join(streamed_text))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
messages.append(response)
|
||||||
tool_map = {tool_def.name: tool_def for tool_def in tools}
|
tool_map = {tool_def.name: tool_def for tool_def in tools}
|
||||||
for call in response.tool_calls:
|
for call in response.tool_calls:
|
||||||
tool_calls_count += 1
|
tool_calls_count += 1
|
||||||
|
|||||||
@@ -51,6 +51,10 @@ def _api_key_for_model(model: str) -> str | None:
|
|||||||
return settings.GOOGLE_API_KEY or None
|
return settings.GOOGLE_API_KEY or None
|
||||||
if model.startswith("cerebras/"):
|
if model.startswith("cerebras/"):
|
||||||
return settings.CEREBRAS_API_KEY or None
|
return settings.CEREBRAS_API_KEY or None
|
||||||
|
if model.startswith("groq/"):
|
||||||
|
return settings.GROQ_API_KEY or None
|
||||||
|
if model.startswith("deepseek/"):
|
||||||
|
return settings.DEEPSEEK_API_KEY or None
|
||||||
if model.startswith("github_copilot/"):
|
if model.startswith("github_copilot/"):
|
||||||
# GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM.
|
# GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM.
|
||||||
# No API key is required; returning None lets LiteLLM handle auth.
|
# No API key is required; returning None lets LiteLLM handle auth.
|
||||||
|
|||||||
Reference in New Issue
Block a user