From d65b7fe01b1737eb75beb4868e93fa1695ce8c09 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 30 May 2024 16:57:11 -0700
Subject: [PATCH] fix(main.py): add logging to audio_transcription calls

---
 litellm/main.py                               |  2 ++
 .../out/{404.html => 404/index.html}          |  0
 .../{model_hub.html => model_hub/index.html}  |  0
 litellm/proxy/proxy_server.py                 | 28 ++++++++++++++++++-
 litellm/utils.py                              |  8 ++++++
 5 files changed, 37 insertions(+), 1 deletion(-)
 rename litellm/proxy/_experimental/out/{404.html => 404/index.html} (100%)
 rename litellm/proxy/_experimental/out/{model_hub.html => model_hub/index.html} (100%)

diff --git a/litellm/main.py b/litellm/main.py
index 2a07ae3c88..525a39d689 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -4164,6 +4164,7 @@ def transcription(
     return response
 
 
+@client
 async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent:
     """
     Calls openai tts endpoints.
@@ -4204,6 +4205,7 @@ async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent:
         )
 
 
+@client
 def speech(
     model: str,
     input: str,
diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/404.html
rename to litellm/proxy/_experimental/out/404/index.html
diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/model_hub.html
rename to litellm/proxy/_experimental/out/model_hub/index.html
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index e2a3425f26..2bd08fb897 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -5002,13 +5002,39 @@ async def audio_speech(
                 },
             )
 
+        ### ALERTING ###
+        data["litellm_status"] = "success"  # used for alerting
+
+        ### RESPONSE HEADERS ###
+        hidden_params = getattr(response, "_hidden_params", {}) or {}
+        model_id = hidden_params.get("model_id", None) or ""
+        cache_key = hidden_params.get("cache_key", None) or ""
+        api_base = hidden_params.get("api_base", None) or ""
+
         # Printing each chunk size
         async def generate(_response: HttpxBinaryResponseContent):
             _generator = await _response.aiter_bytes(chunk_size=1024)
             async for chunk in _generator:
                 yield chunk
 
-        return StreamingResponse(generate(response), media_type="audio/mpeg")
+        custom_headers = get_custom_headers(
+            user_api_key_dict=user_api_key_dict,
+            model_id=model_id,
+            cache_key=cache_key,
+            api_base=api_base,
+            version=version,
+            model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            fastest_response_batch_completion=None,
+        )
+
+        selected_data_generator = select_data_generator(
+            response=response,
+            user_api_key_dict=user_api_key_dict,
+            request_data=data,
+        )
+        return StreamingResponse(
+            generate(response), media_type="audio/mpeg", headers=custom_headers
+        )
 
     except Exception as e:
         traceback.print_exc()
diff --git a/litellm/utils.py b/litellm/utils.py
index 95d9160efa..b48ab9b15a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1136,6 +1136,8 @@ class CallTypes(Enum):
     amoderation = "amoderation"
     atranscription = "atranscription"
     transcription = "transcription"
+    aspeech = "aspeech"
+    speech = "speech"
 
 
 # Logging function -> log the exact model details + what's being sent | Non-BlockingP
@@ -3005,6 +3007,10 @@ def function_setup(
         ):
             _file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
             messages = "audio_file"
+        elif (
+            call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
+        ):
+            messages = kwargs.get("input", "speech")
         stream = True if "stream" in kwargs and kwargs["stream"] == True else False
         logging_obj = Logging(
             model=model,
@@ -3346,6 +3352,8 @@ def client(original_function):
                 return result
             elif "atranscription" in kwargs and kwargs["atranscription"] == True:
                 return result
+            elif "aspeech" in kwargs and kwargs["aspeech"] == True:
+                return result
 
             ### POST-CALL RULES ###
             post_call_processing(original_response=result, model=model or None)