fix(utils.py): fix vertex ai function calling + streaming

Completes https://github.com/BerriAI/litellm/issues/3147
This commit is contained in:
Krrish Dholakia 2024-05-13 12:32:24 -07:00
parent efaf4c87f2
commit 39e4927752
2 changed files with 61 additions and 52 deletions

View file

@ -638,73 +638,66 @@ async def test_gemini_pro_function_calling(sync_mode):
# gemini_pro_function_calling() # gemini_pro_function_calling()
@pytest.mark.parametrize("stream", [False, True])
@pytest.mark.parametrize("sync_mode", [False, True]) @pytest.mark.parametrize("sync_mode", [False, True])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_gemini_pro_function_calling_streaming(stream, sync_mode): async def test_gemini_pro_function_calling_streaming(sync_mode):
load_vertex_ai_credentials() load_vertex_ai_credentials()
litellm.set_verbose = True litellm.set_verbose = True
tools = [ data = {
"model": "vertex_ai/gemini-pro",
"messages": [
{
"role": "user",
"content": "Call the submit_cities function with San Francisco and New York",
}
],
"tools": [
{ {
"type": "function", "type": "function",
"function": { "function": {
"name": "get_current_weather", "name": "submit_cities",
"description": "Get the current weather in a given location", "description": "Submits a list of cities",
"parameters": { "parameters": {
"type": "object", "type": "object",
"properties": { "properties": {
"location": { "cities": {"type": "array", "items": {"type": "string"}}
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
}, },
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, "required": ["cities"],
},
"required": ["location"],
}, },
}, },
} }
] ],
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in fahrenheit?",
}
]
optional_params = {
"tools": tools,
"tool_choice": "auto", "tool_choice": "auto",
"n": 1, "n": 1,
"stream": stream, "stream": True,
"temperature": 0.1, "temperature": 0.1,
} }
chunks = []
try: try:
if sync_mode == True: if sync_mode == True:
response = litellm.completion( response = litellm.completion(**data)
model="gemini-pro", messages=messages, **optional_params
)
print(f"completion: {response}") print(f"completion: {response}")
if stream == True:
# assert completion.choices[0].message.content is None
# assert len(completion.choices[0].message.tool_calls) == 1
for chunk in response: for chunk in response:
chunks.append(chunk)
assert isinstance(chunk, litellm.ModelResponse) assert isinstance(chunk, litellm.ModelResponse)
else: else:
assert isinstance(response, litellm.ModelResponse) response = await litellm.acompletion(**data)
else:
response = await litellm.acompletion(
model="gemini-pro", messages=messages, **optional_params
)
print(f"completion: {response}") print(f"completion: {response}")
if stream == True: assert isinstance(response, litellm.CustomStreamWrapper)
# assert completion.choices[0].message.content is None
# assert len(completion.choices[0].message.tool_calls) == 1
async for chunk in response: async for chunk in response:
print(f"chunk: {chunk}") print(f"chunk: {chunk}")
chunks.append(chunk)
assert isinstance(chunk, litellm.ModelResponse) assert isinstance(chunk, litellm.ModelResponse)
else:
assert isinstance(response, litellm.ModelResponse) complete_response = litellm.stream_chunk_builder(chunks=chunks)
assert (
complete_response.choices[0].message.content is not None
or len(complete_response.choices[0].message.tool_calls) > 0
)
print(f"complete_response: {complete_response}")
except litellm.APIError as e: except litellm.APIError as e:
pass pass
except litellm.RateLimitError as e: except litellm.RateLimitError as e:

View file

@ -10761,6 +10761,8 @@ class CustomStreamWrapper:
else: else:
completion_obj["content"] = str(chunk) completion_obj["content"] = str(chunk)
elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"): elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"):
import proto # type: ignore
if self.model.startswith("claude-3"): if self.model.startswith("claude-3"):
response_obj = self.handle_vertexai_anthropic_chunk(chunk=chunk) response_obj = self.handle_vertexai_anthropic_chunk(chunk=chunk)
if response_obj is None: if response_obj is None:
@ -10798,10 +10800,24 @@ class CustomStreamWrapper:
function_call = ( function_call = (
chunk.candidates[0].content.parts[0].function_call chunk.candidates[0].content.parts[0].function_call
) )
args_dict = {} args_dict = {}
for k, v in function_call.args.items():
args_dict[k] = v # Check if it's a RepeatedComposite instance
for key, val in function_call.args.items():
if isinstance(
val,
proto.marshal.collections.repeated.RepeatedComposite,
):
# If so, convert to list
args_dict[key] = [v for v in val]
else:
args_dict[key] = val
try:
args_str = json.dumps(args_dict) args_str = json.dumps(args_dict)
except Exception as e:
raise e
_delta_obj = litellm.utils.Delta( _delta_obj = litellm.utils.Delta(
content=None, content=None,
tool_calls=[ tool_calls=[