fix(azure/audio_transcriptions.py): support azure cost tracking

extract content time and log correctly as duration
2025-04-26 11:14:04 +00:00 · 2025-03-11 22:25:13 -07:00 · 2025-03-11 22:25:13 -07:00 · b8d590da0c
commit b8d590da0c
parent 92d85555fe
5 changed files with 18 additions and 21 deletions
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -573,9 +573,7 @@ def completion_cost(  # noqa: PLR0915
            base_model=base_model,
        )

-        verbose_logger.debug(
-            f"completion_response _select_model_name_for_cost_calc: {model}"
-        )
+        verbose_logger.info(f"selected model name for cost calculation: {model}")

        if completion_response is not None and (
            isinstance(completion_response, BaseModel)
--- a/litellm/llms/azure/audio_transcriptions.py
+++ b/litellm/llms/azure/audio_transcriptions.py
@ -7,7 +7,11 @@ from pydantic import BaseModel
 import litellm
 from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
 from litellm.types.utils import FileTypes
-from litellm.utils import TranscriptionResponse, convert_to_model_response_object
+from litellm.utils import (
+    TranscriptionResponse,
+    convert_to_model_response_object,
+    extract_duration_from_srt_or_vtt,
+)

 from .azure import (
    AzureChatCompletion,
@ -156,6 +160,8 @@ class AzureAudioTranscription(AzureChatCompletion):
                stringified_response = response.model_dump()
            else:
                stringified_response = TranscriptionResponse(text=response).model_dump()
+                duration = extract_duration_from_srt_or_vtt(response)
+                stringified_response["duration"] = duration

            ## LOGGING
            logging_obj.post_call(
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,17 +1,9 @@
 model_list:
-  - model_name: gpt-3.5-turbo
+  - model_name: whisper
    litellm_params:
-      model: gpt-3.5-turbo
-  - model_name: gpt-4o
-    litellm_params:
-      model: azure/gpt-4o
-      api_key: os.environ/AZURE_API_KEY
-      api_base: os.environ/AZURE_API_BASE
-  - model_name: fake-openai-endpoint-5
-    litellm_params:
-      model: openai/my-fake-model
-      api_key: my-fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-      timeout: 1
-litellm_settings:
-  fallbacks: [{"gpt-3.5-turbo": ["gpt-4o"]}]
+      model: azure/azure-whisper
+      api_version: 2024-02-15-preview
+      api_base: os.environ/AZURE_EUROPE_API_BASE
+      api_key: os.environ/AZURE_EUROPE_API_KEY
+    model_info:
+      mode: audio_transcription
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -947,7 +947,9 @@ def _set_spend_logs_payload(
    spend_logs_url: Optional[str] = None,
 ):
    verbose_proxy_logger.info(
-        "Writing spend log to db - request_id: {}".format(payload.get("request_id"))
+        "Writing spend log to db - request_id: {}, spend: {}".format(
+            payload.get("request_id"), payload.get("spend")
+        )
    )
    if prisma_client is not None and spend_logs_url is not None:
        if isinstance(payload["startTime"], datetime):