fix: litellm tool call parsing event type to in_progress (#1312)

# What does this PR do?

- Test with script:
https://gist.github.com/yanxi0830/64699f3604766ac2319421b750c5bf9c

- Agent with tool calls does not get correctly parsed with LiteLLM
provider b/c we skip processing
`ChatCompletionResponseEventType.complete`.
- However, LiteLLM spits out event_type="complete" with ToolCallDelta


2f7683bc5f/llama_stack/providers/inline/agents/meta_reference/agent_instance.py (L570-L577)


- Llama Model
```
ChatCompletionResponseStreamChunk(
│   event=Event(
│   │   delta=ToolCallDelta(
│   │   │   parse_status='succeeded',
│   │   │   tool_call=ToolCall(
│   │   │   │   arguments={'kind': 'pod', 'namespace': 'openshift-lightspeed'},
│   │   │   │   call_id='call_tIjWTUdsQXhQ2XHC5ke4EQY5',
│   │   │   │   tool_name='get_object_namespace_list'
│   │   │   ),
│   │   │   type='tool_call'
│   │   ),
│   │   event_type='progress',
│   │   logprobs=None,
│   │   stop_reason='end_of_turn'
│   ),
│   metrics=None
)
ChatCompletionResponseStreamChunk(
│   event=Event(
│   │   delta=TextDelta(text='', type='text'),
│   │   event_type='complete',
│   │   logprobs=None,
│   │   stop_reason='end_of_turn'
│   ),
│   metrics=None
)
```

- LiteLLM model
```
ChatCompletionResponseStreamChunk(
│   event=Event(
│   │   delta=ToolCallDelta(
│   │   │   parse_status='succeeded',
│   │   │   tool_call=ToolCall(
│   │   │   │   arguments={'kind': 'pod', 'namespace': 'openshift-lightspeed'},
│   │   │   │   call_id='call_tIjWTUdsQXhQ2XHC5ke4EQY5',
│   │   │   │   tool_name='get_object_namespace_list'
│   │   │   ),
│   │   │   type='tool_call'
│   │   ),
│   │   event_type='complete',
│   │   logprobs=None,
│   │   stop_reason='end_of_turn'
│   ),
│   metrics=None
)
ChatCompletionResponseStreamChunk(
│   event=Event(
│   │   delta=TextDelta(text='', type='text'),
│   │   event_type='complete',
│   │   logprobs=None,
│   │   stop_reason='end_of_turn'
│   ),
│   metrics=None
)
```


[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan

- Test with script:
https://gist.github.com/yanxi0830/64699f3604766ac2319421b750c5bf9c


[//]: # (## Documentation)
This commit is contained in:
Xi Yan 2025-02-27 18:00:27 -08:00 committed by GitHub
parent 2f7683bc5f
commit 076d2f349d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -27,7 +27,9 @@ from openai.types.chat import (
from openai.types.chat import ( from openai.types.chat import (
ChatCompletionMessageParam as OpenAIChatCompletionMessage, ChatCompletionMessageParam as OpenAIChatCompletionMessage,
) )
from openai.types.chat import ChatCompletionMessageToolCall from openai.types.chat import (
ChatCompletionMessageToolCall,
)
from openai.types.chat import ( from openai.types.chat import (
ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall, ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall,
) )
@ -199,7 +201,9 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: Optional[Unio
return None return None
def process_completion_response(response: OpenAICompatCompletionResponse) -> CompletionResponse: def process_completion_response(
response: OpenAICompatCompletionResponse,
) -> CompletionResponse:
choice = response.choices[0] choice = response.choices[0]
# drop suffix <eot_id> if present and return stop reason as end of turn # drop suffix <eot_id> if present and return stop reason as end of turn
if choice.text.endswith("<|eot_id|>"): if choice.text.endswith("<|eot_id|>"):
@ -492,7 +496,9 @@ class UnparseableToolCall(BaseModel):
arguments: str = "" arguments: str = ""
async def convert_message_to_openai_dict_new(message: Message | Dict) -> OpenAIChatCompletionMessage: async def convert_message_to_openai_dict_new(
message: Message | Dict,
) -> OpenAIChatCompletionMessage:
""" """
Convert a Message to an OpenAI API-compatible dictionary. Convert a Message to an OpenAI API-compatible dictionary.
""" """
@ -942,7 +948,7 @@ async def convert_openai_chat_completion_stream(
) )
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.complete, event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta( delta=ToolCallDelta(
tool_call=tool_call, tool_call=tool_call,
parse_status=ToolCallParseStatus.succeeded, parse_status=ToolCallParseStatus.succeeded,