Merge branch 'main' into remove-deprecated-completion

2025-10-04 12:07:34 +00:00 · 2025-10-01 08:56:33 -04:00 · 2025-10-01 08:56:33 -04:00 · 0a0e02a0ac
commit 0a0e02a0ac
parent d6f1d32d3c ca47d90926
66 changed files with 13976 additions and 5631 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -129,13 +129,16 @@ class StreamingResponseOrchestrator:
        messages = self.ctx.messages.copy()

        while True:
+            # Text is the default response format for chat completion so don't need to pass it
+            # (some providers don't support non-empty response_format when tools are present)
+            response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
            completion_result = await self.inference_api.openai_chat_completion(
                model=self.ctx.model,
                messages=messages,
                tools=self.ctx.chat_tools,
                stream=True,
                temperature=self.ctx.temperature,
-                response_format=self.ctx.response_format,
+                response_format=response_format,
            )

            # Process streaming chunks and build complete response
@ -352,8 +355,11 @@ class StreamingResponseOrchestrator:

        # Emit arguments.done events for completed tool calls (differentiate between MCP and function calls)
        for tool_call_index in sorted(chat_response_tool_calls.keys()):
+            tool_call = chat_response_tool_calls[tool_call_index]
+            # Ensure that arguments, if sent back to the inference provider, are not None
+            tool_call.function.arguments = tool_call.function.arguments or "{}"
            tool_call_item_id = tool_call_item_ids[tool_call_index]
-            final_arguments = chat_response_tool_calls[tool_call_index].function.arguments or ""
+            final_arguments = tool_call.function.arguments
            tool_call_name = chat_response_tool_calls[tool_call_index].function.name

            # Check if this is an MCP tool call
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@ -30,7 +30,7 @@ class TelemetryConfig(BaseModel):
        description="The service name to use for telemetry",
    )
    sinks: list[TelemetrySink] = Field(
-        default=[TelemetrySink.CONSOLE, TelemetrySink.SQLITE],
+        default=[TelemetrySink.SQLITE],
        description="List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console)",
    )
    sqlite_db_path: str = Field(
@ -49,7 +49,7 @@ class TelemetryConfig(BaseModel):
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
        return {
            "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
-            "sinks": "${env.TELEMETRY_SINKS:=console,sqlite}",
+            "sinks": "${env.TELEMETRY_SINKS:=sqlite}",
            "sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
            "otel_exporter_otlp_endpoint": "${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}",
        }