Add property ordering for vertex ai schema (#9828) + Fix combining multiple tool calls (#10040)

* fix #9783: Retain schema field ordering for google gemini and vertex (#9828)

* test: update test

* refactor(groq.py): initial commit migrating groq to base_llm_http_handler

* fix(streaming_chunk_builder_utils.py): fix how tool content is combined

Fixes https://github.com/BerriAI/litellm/issues/10034

* fix(vertex_ai/common_utils.py): prevent infinite loop in helper function

* fix(groq/chat/transformation.py): handle groq streaming errors correctly

* fix(groq/chat/transformation.py): handle max_retries

---------

Co-authored-by: Adrian Lyjak <adrian@chatmeter.com>
This commit is contained in:
Krish Dholakia 2025-04-15 22:29:25 -07:00 committed by GitHub
parent 1b9b745cae
commit fdfa1108a6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 493 additions and 201 deletions

View file

@ -1622,24 +1622,22 @@ def completion( # type: ignore # noqa: PLR0915
): # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
response = groq_chat_completions.completion(
response = base_llm_http_handler.completion(
model=model,
stream=stream,
messages=messages,
headers=headers,
model_response=model_response,
print_verbose=print_verbose,
api_key=api_key,
api_base=api_base,
acompletion=acompletion,
logging_obj=logging,
api_base=api_base,
model_response=model_response,
optional_params=optional_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
timeout=timeout, # type: ignore
custom_prompt_dict=custom_prompt_dict,
client=client, # pass AsyncOpenAI, OpenAI client
custom_llm_provider=custom_llm_provider,
timeout=timeout,
headers=headers,
encoding=encoding,
api_key=api_key,
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
client=client,
)
elif custom_llm_provider == "aiohttp_openai":
# NEW aiohttp provider for 10-100x higher RPS
@ -2658,9 +2656,9 @@ def completion( # type: ignore # noqa: PLR0915
"aws_region_name" not in optional_params
or optional_params["aws_region_name"] is None
):
optional_params["aws_region_name"] = (
aws_bedrock_client.meta.region_name
)
optional_params[
"aws_region_name"
] = aws_bedrock_client.meta.region_name
bedrock_route = BedrockModelInfo.get_bedrock_route(model)
if bedrock_route == "converse":
@ -4367,9 +4365,9 @@ def adapter_completion(
new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs) # type: ignore
translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
None
)
translated_response: Optional[
Union[BaseModel, AdapterCompletionStreamWrapper]
] = None
if isinstance(response, ModelResponse):
translated_response = translation_obj.translate_completion_output_params(
response=response
@ -5789,9 +5787,9 @@ def stream_chunk_builder( # noqa: PLR0915
]
if len(content_chunks) > 0:
response["choices"][0]["message"]["content"] = (
processor.get_combined_content(content_chunks)
)
response["choices"][0]["message"][
"content"
] = processor.get_combined_content(content_chunks)
reasoning_chunks = [
chunk
@ -5802,9 +5800,9 @@ def stream_chunk_builder( # noqa: PLR0915
]
if len(reasoning_chunks) > 0:
response["choices"][0]["message"]["reasoning_content"] = (
processor.get_combined_reasoning_content(reasoning_chunks)
)
response["choices"][0]["message"][
"reasoning_content"
] = processor.get_combined_reasoning_content(reasoning_chunks)
audio_chunks = [
chunk