forked from phoenix/litellm-mirror
fix(utils.py): fix vertex ai function calling + streaming
Completes https://github.com/BerriAI/litellm/issues/3147
This commit is contained in:
parent
efaf4c87f2
commit
39e4927752
2 changed files with 61 additions and 52 deletions
|
@ -638,73 +638,66 @@ async def test_gemini_pro_function_calling(sync_mode):
|
||||||
# gemini_pro_function_calling()
|
# gemini_pro_function_calling()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("stream", [False, True])
|
|
||||||
@pytest.mark.parametrize("sync_mode", [False, True])
|
@pytest.mark.parametrize("sync_mode", [False, True])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_gemini_pro_function_calling_streaming(stream, sync_mode):
|
async def test_gemini_pro_function_calling_streaming(sync_mode):
|
||||||
load_vertex_ai_credentials()
|
load_vertex_ai_credentials()
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
tools = [
|
data = {
|
||||||
{
|
"model": "vertex_ai/gemini-pro",
|
||||||
"type": "function",
|
"messages": [
|
||||||
"function": {
|
{
|
||||||
"name": "get_current_weather",
|
"role": "user",
|
||||||
"description": "Get the current weather in a given location",
|
"content": "Call the submit_cities function with San Francisco and New York",
|
||||||
"parameters": {
|
}
|
||||||
"type": "object",
|
],
|
||||||
"properties": {
|
"tools": [
|
||||||
"location": {
|
{
|
||||||
"type": "string",
|
"type": "function",
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
"function": {
|
||||||
|
"name": "submit_cities",
|
||||||
|
"description": "Submits a list of cities",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cities": {"type": "array", "items": {"type": "string"}}
|
||||||
},
|
},
|
||||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
"required": ["cities"],
|
||||||
},
|
},
|
||||||
"required": ["location"],
|
|
||||||
},
|
},
|
||||||
},
|
}
|
||||||
}
|
],
|
||||||
]
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "What's the weather like in Boston today in fahrenheit?",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
optional_params = {
|
|
||||||
"tools": tools,
|
|
||||||
"tool_choice": "auto",
|
"tool_choice": "auto",
|
||||||
"n": 1,
|
"n": 1,
|
||||||
"stream": stream,
|
"stream": True,
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
}
|
}
|
||||||
|
chunks = []
|
||||||
try:
|
try:
|
||||||
if sync_mode == True:
|
if sync_mode == True:
|
||||||
response = litellm.completion(
|
response = litellm.completion(**data)
|
||||||
model="gemini-pro", messages=messages, **optional_params
|
|
||||||
)
|
|
||||||
print(f"completion: {response}")
|
print(f"completion: {response}")
|
||||||
|
|
||||||
if stream == True:
|
for chunk in response:
|
||||||
# assert completion.choices[0].message.content is None
|
chunks.append(chunk)
|
||||||
# assert len(completion.choices[0].message.tool_calls) == 1
|
assert isinstance(chunk, litellm.ModelResponse)
|
||||||
for chunk in response:
|
|
||||||
assert isinstance(chunk, litellm.ModelResponse)
|
|
||||||
else:
|
|
||||||
assert isinstance(response, litellm.ModelResponse)
|
|
||||||
else:
|
else:
|
||||||
response = await litellm.acompletion(
|
response = await litellm.acompletion(**data)
|
||||||
model="gemini-pro", messages=messages, **optional_params
|
|
||||||
)
|
|
||||||
print(f"completion: {response}")
|
print(f"completion: {response}")
|
||||||
|
|
||||||
if stream == True:
|
assert isinstance(response, litellm.CustomStreamWrapper)
|
||||||
# assert completion.choices[0].message.content is None
|
|
||||||
# assert len(completion.choices[0].message.tool_calls) == 1
|
async for chunk in response:
|
||||||
async for chunk in response:
|
print(f"chunk: {chunk}")
|
||||||
print(f"chunk: {chunk}")
|
chunks.append(chunk)
|
||||||
assert isinstance(chunk, litellm.ModelResponse)
|
assert isinstance(chunk, litellm.ModelResponse)
|
||||||
else:
|
|
||||||
assert isinstance(response, litellm.ModelResponse)
|
complete_response = litellm.stream_chunk_builder(chunks=chunks)
|
||||||
|
assert (
|
||||||
|
complete_response.choices[0].message.content is not None
|
||||||
|
or len(complete_response.choices[0].message.tool_calls) > 0
|
||||||
|
)
|
||||||
|
print(f"complete_response: {complete_response}")
|
||||||
except litellm.APIError as e:
|
except litellm.APIError as e:
|
||||||
pass
|
pass
|
||||||
except litellm.RateLimitError as e:
|
except litellm.RateLimitError as e:
|
||||||
|
|
|
@ -10761,6 +10761,8 @@ class CustomStreamWrapper:
|
||||||
else:
|
else:
|
||||||
completion_obj["content"] = str(chunk)
|
completion_obj["content"] = str(chunk)
|
||||||
elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"):
|
elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"):
|
||||||
|
import proto # type: ignore
|
||||||
|
|
||||||
if self.model.startswith("claude-3"):
|
if self.model.startswith("claude-3"):
|
||||||
response_obj = self.handle_vertexai_anthropic_chunk(chunk=chunk)
|
response_obj = self.handle_vertexai_anthropic_chunk(chunk=chunk)
|
||||||
if response_obj is None:
|
if response_obj is None:
|
||||||
|
@ -10798,10 +10800,24 @@ class CustomStreamWrapper:
|
||||||
function_call = (
|
function_call = (
|
||||||
chunk.candidates[0].content.parts[0].function_call
|
chunk.candidates[0].content.parts[0].function_call
|
||||||
)
|
)
|
||||||
|
|
||||||
args_dict = {}
|
args_dict = {}
|
||||||
for k, v in function_call.args.items():
|
|
||||||
args_dict[k] = v
|
# Check if it's a RepeatedComposite instance
|
||||||
args_str = json.dumps(args_dict)
|
for key, val in function_call.args.items():
|
||||||
|
if isinstance(
|
||||||
|
val,
|
||||||
|
proto.marshal.collections.repeated.RepeatedComposite,
|
||||||
|
):
|
||||||
|
# If so, convert to list
|
||||||
|
args_dict[key] = [v for v in val]
|
||||||
|
else:
|
||||||
|
args_dict[key] = val
|
||||||
|
|
||||||
|
try:
|
||||||
|
args_str = json.dumps(args_dict)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
_delta_obj = litellm.utils.Delta(
|
_delta_obj = litellm.utils.Delta(
|
||||||
content=None,
|
content=None,
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue