Merge pull request #9166 from BerriAI/litellm_dev_03_11_2025_p2

Litellm dev 03 11 2025 p2
2025-04-24 18:24:20 +00:00 · 2025-03-11 22:51:20 -07:00 · 2025-03-11 22:51:20 -07:00 · a7e0e7283e
commit a7e0e7283e
parent ae826be259 b8d590da0c
7 changed files with 27 additions and 24 deletions
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -573,9 +573,7 @@ def completion_cost(  # noqa: PLR0915
            base_model=base_model,
        )

-        verbose_logger.debug(
-            f"completion_response _select_model_name_for_cost_calc: {model}"
-        )
+        verbose_logger.info(f"selected model name for cost calculation: {model}")

        if completion_response is not None and (
            isinstance(completion_response, BaseModel)
--- a/litellm/llms/azure/audio_transcriptions.py
+++ b/litellm/llms/azure/audio_transcriptions.py
@ -7,7 +7,11 @@ from pydantic import BaseModel
 import litellm
 from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
 from litellm.types.utils import FileTypes
-from litellm.utils import TranscriptionResponse, convert_to_model_response_object
+from litellm.utils import (
+    TranscriptionResponse,
+    convert_to_model_response_object,
+    extract_duration_from_srt_or_vtt,
+)

 from .azure import (
    AzureChatCompletion,
@ -156,6 +160,8 @@ class AzureAudioTranscription(AzureChatCompletion):
                stringified_response = response.model_dump()
            else:
                stringified_response = TranscriptionResponse(text=response).model_dump()
+                duration = extract_duration_from_srt_or_vtt(response)
+                stringified_response["duration"] = duration

            ## LOGGING
            logging_obj.post_call(
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@ -1231,7 +1231,9 @@ class AWSEventStreamDecoder:
        if len(self.content_blocks) == 0:
            return False

-        if "text" in self.content_blocks[0]:
+        if (
+            "toolUse" not in self.content_blocks[0]
+        ):  # be explicit - only do this if tool use block, as this is to prevent json decoding errors
            return False

        for block in self.content_blocks:
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,17 +1,9 @@
 model_list:
-  - model_name: gpt-3.5-turbo
+  - model_name: whisper
    litellm_params:
-      model: gpt-3.5-turbo
-  - model_name: gpt-4o
-    litellm_params:
-      model: azure/gpt-4o
-      api_key: os.environ/AZURE_API_KEY
-      api_base: os.environ/AZURE_API_BASE
-  - model_name: fake-openai-endpoint-5
-    litellm_params:
-      model: openai/my-fake-model
-      api_key: my-fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-      timeout: 1
-litellm_settings:
-  fallbacks: [{"gpt-3.5-turbo": ["gpt-4o"]}]
+      model: azure/azure-whisper
+      api_version: 2024-02-15-preview
+      api_base: os.environ/AZURE_EUROPE_API_BASE
+      api_key: os.environ/AZURE_EUROPE_API_KEY
+    model_info:
+      mode: audio_transcription
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -947,7 +947,9 @@ def _set_spend_logs_payload(
    spend_logs_url: Optional[str] = None,
 ):
    verbose_proxy_logger.info(
-        "Writing spend log to db - request_id: {}".format(payload.get("request_id"))
+        "Writing spend log to db - request_id: {}, spend: {}".format(
+            payload.get("request_id"), payload.get("spend")
+        )
    )
    if prisma_client is not None and spend_logs_url is not None:
        if isinstance(payload["startTime"], datetime):
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@ -992,8 +992,8 @@ def test_anthropic_thinking_output(model):
@pytest.mark.parametrize(
    "model",
    [
-        "anthropic/claude-3-7-sonnet-20250219",
-        # "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+        # "anthropic/claude-3-7-sonnet-20250219",
+        "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
        # "bedrock/invoke/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    ],
 )
@ -1011,8 +1011,11 @@ def test_anthropic_thinking_output_stream(model):

        reasoning_content_exists = False
        signature_block_exists = False
+        tool_call_exists = False
        for chunk in resp:
            print(f"chunk 2: {chunk}")
+            if chunk.choices[0].delta.tool_calls:
+                tool_call_exists = True
            if (
                hasattr(chunk.choices[0].delta, "thinking_blocks")
                and chunk.choices[0].delta.thinking_blocks is not None
@ -1025,6 +1028,7 @@ def test_anthropic_thinking_output_stream(model):
                print(chunk.choices[0].delta.thinking_blocks[0])
                if chunk.choices[0].delta.thinking_blocks[0].get("signature"):
                    signature_block_exists = True
+        assert not tool_call_exists
        assert reasoning_content_exists
        assert signature_block_exists
    except litellm.Timeout: