mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 18:50:44 +00:00
PR tool call followups
This commit is contained in:
parent
1f60c0286d
commit
76e08cfde0
3 changed files with 126 additions and 30 deletions
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import warnings
|
import warnings
|
||||||
from typing import AsyncGenerator, Literal
|
from typing import AsyncGenerator, Literal, Union
|
||||||
|
|
||||||
from groq import Stream
|
from groq import Stream
|
||||||
from groq.types.chat.chat_completion import ChatCompletion
|
from groq.types.chat.chat_completion import ChatCompletion
|
||||||
|
@ -30,6 +30,8 @@ from groq.types.shared.function_definition import FunctionDefinition
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
from llama_stack.apis.common.content_types import (
|
||||||
TextDelta,
|
TextDelta,
|
||||||
ToolCallDelta,
|
ToolCallDelta,
|
||||||
|
@ -150,15 +152,26 @@ def convert_chat_completion_response(
|
||||||
_convert_groq_tool_call(tool_call)
|
_convert_groq_tool_call(tool_call)
|
||||||
for tool_call in choice.message.tool_calls
|
for tool_call in choice.message.tool_calls
|
||||||
]
|
]
|
||||||
return ChatCompletionResponse(
|
if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls):
|
||||||
completion_message=CompletionMessage(
|
# If we couldn't parse a tool call, jsonify the tool calls and return them
|
||||||
tool_calls=tool_calls,
|
return ChatCompletionResponse(
|
||||||
stop_reason=StopReason.end_of_message,
|
completion_message=CompletionMessage(
|
||||||
# Content is not optional
|
stop_reason=StopReason.end_of_message,
|
||||||
content="",
|
content=json.dumps(tool_calls, default=lambda x: x.model_dump()),
|
||||||
),
|
),
|
||||||
logprobs=None,
|
logprobs=None,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
# Otherwise, return tool calls as normal
|
||||||
|
return ChatCompletionResponse(
|
||||||
|
completion_message=CompletionMessage(
|
||||||
|
tool_calls=tool_calls,
|
||||||
|
stop_reason=StopReason.end_of_message,
|
||||||
|
# Content is not optional
|
||||||
|
content="",
|
||||||
|
),
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return ChatCompletionResponse(
|
return ChatCompletionResponse(
|
||||||
completion_message=CompletionMessage(
|
completion_message=CompletionMessage(
|
||||||
|
@ -214,15 +227,27 @@ async def convert_chat_completion_response_stream(
|
||||||
|
|
||||||
# We assume Groq produces fully formed tool calls for each chunk
|
# We assume Groq produces fully formed tool calls for each chunk
|
||||||
tool_call = _convert_groq_tool_call(choice.delta.tool_calls[0])
|
tool_call = _convert_groq_tool_call(choice.delta.tool_calls[0])
|
||||||
yield ChatCompletionResponseStreamChunk(
|
if isinstance(tool_call, ToolCall):
|
||||||
event=ChatCompletionResponseEvent(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event_type=event_type,
|
event=ChatCompletionResponseEvent(
|
||||||
delta=ToolCallDelta(
|
event_type=event_type,
|
||||||
content=tool_call,
|
delta=ToolCallDelta(
|
||||||
parse_status=ToolCallParseStatus.succeeded,
|
content=tool_call,
|
||||||
),
|
parse_status=ToolCallParseStatus.succeeded,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Otherwise it's an UnparseableToolCall - return the raw tool call
|
||||||
|
yield ChatCompletionResponseStreamChunk(
|
||||||
|
event=ChatCompletionResponseEvent(
|
||||||
|
event_type=event_type,
|
||||||
|
delta=ToolCallDelta(
|
||||||
|
content=tool_call.model_dump_json(),
|
||||||
|
parse_status=ToolCallParseStatus.failed,
|
||||||
|
),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
yield ChatCompletionResponseStreamChunk(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event=ChatCompletionResponseEvent(
|
event=ChatCompletionResponseEvent(
|
||||||
|
@ -234,12 +259,35 @@ async def convert_chat_completion_response_stream(
|
||||||
event_type = ChatCompletionResponseEventType.progress
|
event_type = ChatCompletionResponseEventType.progress
|
||||||
|
|
||||||
|
|
||||||
def _convert_groq_tool_call(tool_call: ChatCompletionMessageToolCall) -> ToolCall:
|
class UnparseableToolCall(BaseModel):
|
||||||
|
"""
|
||||||
|
A ToolCall with arguments that are not valid JSON.
|
||||||
|
Mirrors the ToolCall schema, but with arguments as a string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
call_id: str
|
||||||
|
tool_name: str
|
||||||
|
arguments: str
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_groq_tool_call(
|
||||||
|
tool_call: ChatCompletionMessageToolCall,
|
||||||
|
) -> Union[ToolCall, UnparseableToolCall]:
|
||||||
|
"""
|
||||||
|
Convert a Groq tool call to a ToolCall.
|
||||||
|
Returns an UnparseableToolCall if the tool call is not valid JSON.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
arguments = json.loads(tool_call.function.arguments)
|
||||||
|
except Exception as e:
|
||||||
|
return UnparseableToolCall(
|
||||||
|
call_id=tool_call.id,
|
||||||
|
tool_name=tool_call.function.name,
|
||||||
|
arguments=tool_call.function.arguments,
|
||||||
|
)
|
||||||
|
|
||||||
return ToolCall(
|
return ToolCall(
|
||||||
call_id=tool_call.id,
|
call_id=tool_call.id,
|
||||||
tool_name=tool_call.function.name,
|
tool_name=tool_call.function.name,
|
||||||
# Note that Groq may return a string that is not valid JSON here
|
arguments=arguments,
|
||||||
# So this may raise a 500 error. Going to leave this as is to see
|
|
||||||
# how big of an issue this is and what we can do about it.
|
|
||||||
arguments=json.loads(tool_call.function.arguments),
|
|
||||||
)
|
)
|
||||||
|
|
|
@ -23,6 +23,7 @@ from groq.types.chat.chat_completion_message_tool_call import (
|
||||||
from groq.types.shared.function_definition import FunctionDefinition
|
from groq.types.shared.function_definition import FunctionDefinition
|
||||||
from llama_models.datatypes import GreedySamplingStrategy, TopPSamplingStrategy
|
from llama_models.datatypes import GreedySamplingStrategy, TopPSamplingStrategy
|
||||||
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
||||||
|
from llama_stack.apis.common.content_types import ToolCallParseStatus
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
ChatCompletionResponseEventType,
|
ChatCompletionResponseEventType,
|
||||||
|
@ -347,6 +348,26 @@ class TestConvertNonStreamChatCompletionResponse:
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def test_converts_unparseable_tool_calls(self):
|
||||||
|
response = self._dummy_chat_completion_response_with_tool_call()
|
||||||
|
response.choices[0].message.tool_calls = [
|
||||||
|
ChatCompletionMessageToolCall(
|
||||||
|
id="tool_call_id",
|
||||||
|
type="function",
|
||||||
|
function=Function(
|
||||||
|
name="log",
|
||||||
|
arguments="(number=10, base=2)",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
converted = convert_chat_completion_response(response)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
converted.completion_message.content
|
||||||
|
== '[{"call_id": "tool_call_id", "tool_name": "log", "arguments": "(number=10, base=2)"}]'
|
||||||
|
)
|
||||||
|
|
||||||
def _dummy_chat_completion_response(self):
|
def _dummy_chat_completion_response(self):
|
||||||
return ChatCompletion(
|
return ChatCompletion(
|
||||||
id="chatcmpl-123",
|
id="chatcmpl-123",
|
||||||
|
@ -478,6 +499,40 @@ class TestConvertStreamChatCompletionResponse:
|
||||||
arguments={"origin": "AU", "destination": "LAX"},
|
arguments={"origin": "AU", "destination": "LAX"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_returns_tool_calls_stream_with_unparseable_tool_calls(self):
|
||||||
|
def tool_call_stream():
|
||||||
|
chunk = self._dummy_chat_completion_chunk_with_tool_call()
|
||||||
|
chunk.choices[0].delta.tool_calls = [
|
||||||
|
ChoiceDeltaToolCall(
|
||||||
|
index=0,
|
||||||
|
type="function",
|
||||||
|
id="tool_call_id",
|
||||||
|
function=ChoiceDeltaToolCallFunction(
|
||||||
|
name="get_flight_info",
|
||||||
|
arguments="(origin=AU, destination=LAX)",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
chunk = self._dummy_chat_completion_chunk_with_tool_call()
|
||||||
|
chunk.choices[0].delta.content = None
|
||||||
|
chunk.choices[0].finish_reason = "stop"
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
stream = tool_call_stream()
|
||||||
|
converted = convert_chat_completion_response_stream(stream)
|
||||||
|
|
||||||
|
iter = converted.__aiter__()
|
||||||
|
chunk = await iter.__anext__()
|
||||||
|
assert chunk.event.event_type == ChatCompletionResponseEventType.start
|
||||||
|
assert (
|
||||||
|
chunk.event.delta.content
|
||||||
|
== '{"call_id":"tool_call_id","tool_name":"get_flight_info","arguments":"(origin=AU, destination=LAX)"}'
|
||||||
|
)
|
||||||
|
assert chunk.event.delta.parse_status == ToolCallParseStatus.failed
|
||||||
|
|
||||||
def _dummy_chat_completion_chunk(self):
|
def _dummy_chat_completion_chunk(self):
|
||||||
return ChatCompletionChunk(
|
return ChatCompletionChunk(
|
||||||
id="chatcmpl-123",
|
id="chatcmpl-123",
|
||||||
|
|
|
@ -377,13 +377,6 @@ class TestInference:
|
||||||
sample_tool_definition,
|
sample_tool_definition,
|
||||||
):
|
):
|
||||||
inference_impl, _ = inference_stack
|
inference_impl, _ = inference_stack
|
||||||
provider = inference_impl.routing_table.get_provider_impl(inference_model)
|
|
||||||
if (
|
|
||||||
provider.__provider_spec__.provider_type == "remote::groq"
|
|
||||||
and "Llama-3.2" in inference_model
|
|
||||||
):
|
|
||||||
# TODO(aidand): Remove this skip once Groq's tool calling for Llama3.2 works better
|
|
||||||
pytest.skip("Groq's tool calling for Llama3.2 doesn't work very well")
|
|
||||||
|
|
||||||
messages = sample_messages + [
|
messages = sample_messages + [
|
||||||
UserMessage(
|
UserMessage(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue