Merge a62fcb9acd into de692162af

2025-08-15 14:08:00 +00:00 · 2025-08-14 10:04:57 -04:00 · 2025-08-14 10:04:57 -04:00 · 40ef454abd
commit 40ef454abd
parent de692162af a62fcb9acd
1 changed files with 58 additions and 9 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -68,6 +68,11 @@ from llama_stack.models.llama.datatypes import (
    BuiltinTool,
    ToolCall,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    convert_message_to_openai_dict,
    convert_openai_chat_completion_stream,
    convert_tooldef_to_openai_tool,
 )
 from llama_stack.providers.utils.kvstore import KVStore
 from llama_stack.providers.utils.telemetry import tracing
@ -510,16 +515,60 @@ class ChatAgent(ShieldRunnerMixin):
            async with tracing.span("inference") as span:
                if self.agent_config.name:
                    span.set_attribute("agent_name", self.agent_config.name)
-                async for chunk in await self.inference_api.chat_completion(
+                # Convert messages to OpenAI format
-                    self.agent_config.model,
+                openai_messages = []
-                    input_messages,
+                for message in input_messages:
-                    tools=self.tool_defs,
+                    openai_message = await convert_message_to_openai_dict(message)
-                    tool_prompt_format=self.agent_config.tool_config.tool_prompt_format,
+                    openai_messages.append(openai_message)
-                    response_format=self.agent_config.response_format,
+
                # Convert tool definitions to OpenAI format
                openai_tools = None
                if self.tool_defs:
                    openai_tools = []
                    for tool_def in self.tool_defs:
                        openai_tool = convert_tooldef_to_openai_tool(tool_def)
                        openai_tools.append(openai_tool)
                # Extract tool_choice from tool_config for OpenAI compatibility
                # Note: tool_choice can only be provided when tools are also provided
                tool_choice = None
                if openai_tools and self.agent_config.tool_config and self.agent_config.tool_config.tool_choice:
                    tool_choice = (
                        self.agent_config.tool_config.tool_choice.value
                        if hasattr(self.agent_config.tool_config.tool_choice, "value")
                        else str(self.agent_config.tool_config.tool_choice)
                    )
                # Convert sampling params to OpenAI format (temperature, top_p, max_tokens)
                temperature = None
                top_p = None
                max_tokens = None
                if sampling_params:
                    if hasattr(sampling_params.strategy, "temperature"):
                        temperature = sampling_params.strategy.temperature
                    if hasattr(sampling_params.strategy, "top_p"):
                        top_p = sampling_params.strategy.top_p
                    if sampling_params.max_tokens:
                        max_tokens = sampling_params.max_tokens
                # Use OpenAI chat completion
                openai_stream = await self.inference_api.openai_chat_completion(
                    model=self.agent_config.model,
                    messages=openai_messages,
                    tools=openai_tools if openai_tools else None,
                    tool_choice=tool_choice,
                    temperature=temperature,
                    top_p=top_p,
                    max_tokens=max_tokens,
                    stream=True,
-                    sampling_params=sampling_params,
+                )
-                    tool_config=self.agent_config.tool_config,
+
-                ):
+                # Convert OpenAI stream back to Llama Stack format
                response_stream = convert_openai_chat_completion_stream(
                    openai_stream, enable_incremental_tool_calls=True
                )
                async for chunk in response_stream:
                    event = chunk.event
                    if event.event_type == ChatCompletionResponseEventType.start:
                        continue