Litellm dev 2024 12 20 p1 (#7335)

* fix(utils.py): e2e azure tts cost tracking working moves tts response obj to include hidden params (allows for litellm call id, etc. to be sent in response headers) ; fixes spend_Tracking_utils logging payload to account for non-base model use-case Fixes https://github.com/BerriAI/litellm/issues/7223 * fix: fix linting errors * build(model_prices_and_context_window.json): add bedrock llama 3.3 Closes https://github.com/BerriAI/litellm/issues/7329 * fix(openai.py): fix return type for sync openai httpx response * test: update test * fix(spend_tracking_utils.py): fix if check * fix(spend_tracking_utils.py): fix if check * test: improve debugging for test * fix: fix import
2025-04-26 03:04:13 +00:00 · 2024-12-20 21:22:31 -08:00 · 2024-12-20 21:22:31 -08:00 · 404bf2974b
commit 404bf2974b
parent 522da384b6
12 changed files with 63 additions and 36 deletions
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@ -1386,7 +1386,7 @@ class AzureChatCompletion(BaseLLM):
            input=input,
            **optional_params,
        )
-        return response
+        return HttpxBinaryResponseContent(response=response.response)

    async def async_audio_speech(
        self,
@ -1415,14 +1415,14 @@ class AzureChatCompletion(BaseLLM):
            client_type="async",
        )  # type: ignore

-        response = await azure_client.audio.speech.create(
+        azure_response = await azure_client.audio.speech.create(
            model=model,
            voice=voice,  # type: ignore
            input=input,
            **optional_params,
        )

-        return response
+        return HttpxBinaryResponseContent(response=azure_response.response)

    def get_headers(
        self,