diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index 737a384d8..7e80a067f 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -573,6 +573,12 @@ class InferenceRouter(Inference): for tool in tools: TypeAdapter(OpenAIChatCompletionToolParam).validate_python(tool) + # Some providers make tool calls even when tool_choice is "none" + # so just clear them both out to avoid unexpected tool calls + if tool_choice == "none" and tools is not None: + tool_choice = None + tools = None + params = dict( model=model_obj.identifier, messages=messages, @@ -600,7 +606,19 @@ class InferenceRouter(Inference): ) provider = self.routing_table.get_provider_impl(model_obj.identifier) - return await provider.openai_chat_completion(**params) + if stream: + return await provider.openai_chat_completion(**params) + else: + return await self._nonstream_openai_chat_completion(provider, params) + + async def _nonstream_openai_chat_completion(self, provider: Inference, params: dict) -> OpenAIChatCompletion: + response = await provider.openai_chat_completion(**params) + for choice in response.choices: + # some providers return an empty list for no tool calls in non-streaming responses + # but the OpenAI API returns None. So, set tool_calls to None if it's empty + if choice.message and choice.message.tool_calls is not None and len(choice.message.tool_calls) == 0: + choice.message.tool_calls = None + return response async def health(self) -> dict[str, HealthResponse]: health_statuses = {} diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index d0d45b429..32e2b17d0 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -447,12 +447,6 @@ class OllamaInferenceAdapter( user: str | None = None, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: model_obj = await self._get_model(model) - - # ollama still makes tool calls even when tool_choice is "none" - # so we need to remove the tools in that case - if tool_choice == "none" and tools is not None: - tools = None - params = { k: v for k, v in {