Litellm dev 2024 12 20 p1 (#7335)

* fix(utils.py): e2e azure tts cost tracking working moves tts response obj to include hidden params (allows for litellm call id, etc. to be sent in response headers) ; fixes spend_Tracking_utils logging payload to account for non-base model use-case Fixes https://github.com/BerriAI/litellm/issues/7223 * fix: fix linting errors * build(model_prices_and_context_window.json): add bedrock llama 3.3 Closes https://github.com/BerriAI/litellm/issues/7329 * fix(openai.py): fix return type for sync openai httpx response * test: update test * fix(spend_tracking_utils.py): fix if check * fix(spend_tracking_utils.py): fix if check * test: improve debugging for test * fix: fix import
2025-04-25 10:44:24 +00:00 · 2024-12-20 21:22:31 -08:00 · 2024-12-20 21:22:31 -08:00 · 404bf2974b
commit 404bf2974b
parent 522da384b6
12 changed files with 63 additions and 36 deletions
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -1253,13 +1253,13 @@ class OpenAIChatCompletion(BaseLLM):
            client=client,
        )

-        response = openai_client.audio.speech.create(
+        response = cast(OpenAI, openai_client).audio.speech.create(
            model=model,
            voice=voice,  # type: ignore
            input=input,
            **optional_params,
        )
-        return response  # type: ignore
+        return HttpxBinaryResponseContent(response=response.response)

    async def async_audio_speech(
        self,
@ -1276,13 +1276,16 @@ class OpenAIChatCompletion(BaseLLM):
        client=None,
    ) -> HttpxBinaryResponseContent:

-        openai_client = self._get_openai_client(
-            is_async=True,
-            api_key=api_key,
-            api_base=api_base,
-            timeout=timeout,
-            max_retries=max_retries,
-            client=client,
+        openai_client = cast(
+            AsyncOpenAI,
+            self._get_openai_client(
+                is_async=True,
+                api_key=api_key,
+                api_base=api_base,
+                timeout=timeout,
+                max_retries=max_retries,
+                client=client,
+            ),
        )

        response = await openai_client.audio.speech.create(
@ -1292,7 +1295,7 @@ class OpenAIChatCompletion(BaseLLM):
            **optional_params,
        )

-        return response
+        return HttpxBinaryResponseContent(response=response.response)

    async def ahealth_check(
        self,
@ -1477,7 +1480,7 @@ class OpenAIFilesAPI(BaseLLM):
        openai_client: AsyncOpenAI,
    ) -> HttpxBinaryResponseContent:
        response = await openai_client.files.content(**file_content_request)
-        return response
+        return HttpxBinaryResponseContent(response=response.response)

    def file_content(
        self,
@ -1515,9 +1518,9 @@ class OpenAIFilesAPI(BaseLLM):
                file_content_request=file_content_request,
                openai_client=openai_client,
            )
-        response = openai_client.files.content(**file_content_request)
+        response = cast(OpenAI, openai_client).files.content(**file_content_request)

-        return response
+        return HttpxBinaryResponseContent(response=response.response)

    async def aretrieve_file(
        self,