[#432] Groq Provider tool call tweaks (#811)

# What does this PR do? Follow up for @ashwinb's comments in https://github.com/meta-llama/llama-stack/pull/630 - [x] Contributes to issue (#432) ## Test Plan <details> <summary>Environment</summary> ```shell export GROQ_API_KEY=<api-key> # Create environment if not already conda create --name llamastack-groq python=3.10 conda activate llamastack-groq wget https://raw.githubusercontent.com/aidando73/llama-stack/9165502582cd7cb178bc1dcf89955b45768ab6c1/build.yaml wget https://raw.githubusercontent.com/meta-llama/llama-stack/918172c7fa92522c9ebc586bdb4f386b1d9ea224/run.yaml # Build pip install -e . && llama stack build --config ./build.yaml --image-type conda # Activate built environment conda activate llamastack-groq # Test deps pip install pytest pytest_html pytest_asyncio ``` </details> <details> <summary>Unit tests</summary> ```shell # Setup conda activate llamastack-groq pytest llama_stack/providers/tests/inference/groq/test_groq_utils.py -vv -k groq -s # Result llama_stack/providers/tests/inference/groq/test_groq_utils.py ....................... ========================================= 23 passed, 11 warnings in 0.06s ========================================= ``` </details> <details> <summary>Integration tests</summary> ```shell # Tests pytest llama_stack/providers/tests/inference/test_text_inference.py -k groq -s # Results ___________________________ TestInference.test_chat_completion_with_tool_calling[-groq] ___________________________ llama_stack/providers/tests/inference/test_text_inference.py:403: in test_chat_completion_with_tool_calling assert len(message.tool_calls) > 0 E assert 0 > 0 E + where 0 = len([]) E + where [] = CompletionMessage(role='assistant', content='<function=get_weather>{"location": "San Francisco, CA"}', stop_reason=<StopReason.end_of_turn: 'end_of_turn'>, tool_calls=[]).tool_calls ============================================= short test summary info ============================================= FAILED llama_stack/providers/tests/inference/test_text_inference.py::TestInference::test_chat_completion_with_tool_calling[-groq] - assert 0 > 0 ======================== 1 failed, 3 passed, 5 skipped, 99 deselected, 7 warnings in 2.13s ======================== ``` (One failure as expected from 3.2 3B - re: https://github.com/meta-llama/llama-stack/pull/630#discussion_r1914056503) </details> ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [x] Wrote necessary unit or integration tests. Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-01-30 07:02:12 +11:00 · 2025-01-30 07:02:12 +11:00 · 39c34dd25f
commit 39c34dd25f
parent d5b7de3897
2 changed files with 126 additions and 23 deletions
--- a/llama_stack/providers/remote/inference/groq/groq_utils.py
+++ b/llama_stack/providers/remote/inference/groq/groq_utils.py
@ -6,7 +6,7 @@

 import json
 import warnings
-from typing import AsyncGenerator, Literal
+from typing import AsyncGenerator, Literal, Union

 from groq import Stream
 from groq.types.chat.chat_completion import ChatCompletion
@ -30,6 +30,8 @@ from groq.types.shared.function_definition import FunctionDefinition

 from llama_models.llama3.api.datatypes import ToolParamDefinition

+from pydantic import BaseModel
+
 from llama_stack.apis.common.content_types import (
    TextDelta,
    ToolCallDelta,
@ -150,15 +152,26 @@ def convert_chat_completion_response(
            _convert_groq_tool_call(tool_call)
            for tool_call in choice.message.tool_calls
        ]
-        return ChatCompletionResponse(
-            completion_message=CompletionMessage(
-                tool_calls=tool_calls,
-                stop_reason=StopReason.end_of_message,
-                # Content is not optional
-                content="",
-            ),
-            logprobs=None,
-        )
+        if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls):
+            # If we couldn't parse a tool call, jsonify the tool calls and return them
+            return ChatCompletionResponse(
+                completion_message=CompletionMessage(
+                    stop_reason=StopReason.end_of_message,
+                    content=json.dumps(tool_calls, default=lambda x: x.model_dump()),
+                ),
+                logprobs=None,
+            )
+        else:
+            # Otherwise, return tool calls as normal
+            return ChatCompletionResponse(
+                completion_message=CompletionMessage(
+                    tool_calls=tool_calls,
+                    stop_reason=StopReason.end_of_message,
+                    # Content is not optional
+                    content="",
+                ),
+                logprobs=None,
+            )
    else:
        return ChatCompletionResponse(
            completion_message=CompletionMessage(
@ -214,15 +227,27 @@ async def convert_chat_completion_response_stream(

            # We assume Groq produces fully formed tool calls for each chunk
            tool_call = _convert_groq_tool_call(choice.delta.tool_calls[0])
-            yield ChatCompletionResponseStreamChunk(
-                event=ChatCompletionResponseEvent(
-                    event_type=event_type,
-                    delta=ToolCallDelta(
-                        tool_call=tool_call,
-                        parse_status=ToolCallParseStatus.succeeded,
-                    ),
+            if isinstance(tool_call, ToolCall):
+                yield ChatCompletionResponseStreamChunk(
+                    event=ChatCompletionResponseEvent(
+                        event_type=event_type,
+                        delta=ToolCallDelta(
+                            tool_call=tool_call,
+                            parse_status=ToolCallParseStatus.succeeded,
+                        ),
+                    )
+                )
+            else:
+                # Otherwise it's an UnparseableToolCall - return the raw tool call
+                yield ChatCompletionResponseStreamChunk(
+                    event=ChatCompletionResponseEvent(
+                        event_type=event_type,
+                        delta=ToolCallDelta(
+                            tool_call=tool_call.model_dump_json(),
+                            parse_status=ToolCallParseStatus.failed,
+                        ),
+                    )
                )
-            )
        else:
            yield ChatCompletionResponseStreamChunk(
                event=ChatCompletionResponseEvent(
@ -234,12 +259,35 @@ async def convert_chat_completion_response_stream(
        event_type = ChatCompletionResponseEventType.progress


-def _convert_groq_tool_call(tool_call: ChatCompletionMessageToolCall) -> ToolCall:
+class UnparseableToolCall(BaseModel):
+    """
+    A ToolCall with arguments that are not valid JSON.
+    Mirrors the ToolCall schema, but with arguments as a string.
+    """
+
+    call_id: str
+    tool_name: str
+    arguments: str
+
+
+def _convert_groq_tool_call(
+    tool_call: ChatCompletionMessageToolCall,
+) -> Union[ToolCall, UnparseableToolCall]:
+    """
+    Convert a Groq tool call to a ToolCall.
+    Returns an UnparseableToolCall if the tool call is not valid JSON.
+    """
+    try:
+        arguments = json.loads(tool_call.function.arguments)
+    except Exception as e:
+        return UnparseableToolCall(
+            call_id=tool_call.id,
+            tool_name=tool_call.function.name,
+            arguments=tool_call.function.arguments,
+        )
+
    return ToolCall(
        call_id=tool_call.id,
        tool_name=tool_call.function.name,
-        # Note that Groq may return a string that is not valid JSON here
-        # So this may raise a 500 error. Going to leave this as is to see
-        # how big of an issue this is and what we can do about it.
-        arguments=json.loads(tool_call.function.arguments),
+        arguments=arguments,
    )