Add property ordering for vertex ai schema (#9828) + Fix combining multiple tool calls (#10040)

* fix #9783: Retain schema field ordering for google gemini and vertex (#9828) * test: update test * refactor(groq.py): initial commit migrating groq to base_llm_http_handler * fix(streaming_chunk_builder_utils.py): fix how tool content is combined Fixes https://github.com/BerriAI/litellm/issues/10034 * fix(vertex_ai/common_utils.py): prevent infinite loop in helper function * fix(groq/chat/transformation.py): handle groq streaming errors correctly * fix(groq/chat/transformation.py): handle max_retries --------- Co-authored-by: Adrian Lyjak <adrian@chatmeter.com>
2025-04-25 02:34:29 +00:00 · 2025-04-15 22:29:25 -07:00 · 2025-04-15 22:29:25 -07:00 · fdfa1108a6
commit fdfa1108a6
parent 1b9b745cae
12 changed files with 493 additions and 201 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1622,24 +1622,22 @@ def completion(  # type: ignore # noqa: PLR0915
                ):  # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
                    optional_params[k] = v

-            response = groq_chat_completions.completion(
+            response = base_llm_http_handler.completion(
                model=model,
+                stream=stream,
                messages=messages,
-                headers=headers,
-                model_response=model_response,
-                print_verbose=print_verbose,
-                api_key=api_key,
-                api_base=api_base,
                acompletion=acompletion,
-                logging_obj=logging,
+                api_base=api_base,
+                model_response=model_response,
                optional_params=optional_params,
                litellm_params=litellm_params,
-                logger_fn=logger_fn,
-                timeout=timeout,  # type: ignore
-                custom_prompt_dict=custom_prompt_dict,
-                client=client,  # pass AsyncOpenAI, OpenAI client
                custom_llm_provider=custom_llm_provider,
+                timeout=timeout,
+                headers=headers,
                encoding=encoding,
+                api_key=api_key,
+                logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
+                client=client,
            )
        elif custom_llm_provider == "aiohttp_openai":
            # NEW aiohttp provider for 10-100x higher RPS
@ -2658,9 +2656,9 @@ def completion(  # type: ignore # noqa: PLR0915
                    "aws_region_name" not in optional_params
                    or optional_params["aws_region_name"] is None
                ):
-                    optional_params["aws_region_name"] = (
-                        aws_bedrock_client.meta.region_name
-                    )
+                    optional_params[
+                        "aws_region_name"
+                    ] = aws_bedrock_client.meta.region_name

            bedrock_route = BedrockModelInfo.get_bedrock_route(model)
            if bedrock_route == "converse":
@ -4367,9 +4365,9 @@ def adapter_completion(
    new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)

    response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs)  # type: ignore
-    translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
-        None
-    )
+    translated_response: Optional[
+        Union[BaseModel, AdapterCompletionStreamWrapper]
+    ] = None
    if isinstance(response, ModelResponse):
        translated_response = translation_obj.translate_completion_output_params(
            response=response
@ -5789,9 +5787,9 @@ def stream_chunk_builder(  # noqa: PLR0915
        ]

        if len(content_chunks) > 0:
-            response["choices"][0]["message"]["content"] = (
-                processor.get_combined_content(content_chunks)
-            )
+            response["choices"][0]["message"][
+                "content"
+            ] = processor.get_combined_content(content_chunks)

        reasoning_chunks = [
            chunk
@ -5802,9 +5800,9 @@ def stream_chunk_builder(  # noqa: PLR0915
        ]

        if len(reasoning_chunks) > 0:
-            response["choices"][0]["message"]["reasoning_content"] = (
-                processor.get_combined_reasoning_content(reasoning_chunks)
-            )
+            response["choices"][0]["message"][
+                "reasoning_content"
+            ] = processor.get_combined_reasoning_content(reasoning_chunks)

        audio_chunks = [
            chunk