diff --git a/.gitignore b/.gitignore index e8e8aed2b1..6e5f15dbe8 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ litellm/tests/langfuse.log litellm/tests/langfuse.log litellm/proxy/google-cloud-sdk/* tests/llm_translation/log.txt +venv/ diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py index c2be42648c..b38c7abbcb 100644 --- a/litellm/llms/azure/azure.py +++ b/litellm/llms/azure/azure.py @@ -1386,7 +1386,7 @@ class AzureChatCompletion(BaseLLM): input=input, **optional_params, ) - return response + return HttpxBinaryResponseContent(response=response.response) async def async_audio_speech( self, @@ -1415,14 +1415,14 @@ class AzureChatCompletion(BaseLLM): client_type="async", ) # type: ignore - response = await azure_client.audio.speech.create( + azure_response = await azure_client.audio.speech.create( model=model, voice=voice, # type: ignore input=input, **optional_params, ) - return response + return HttpxBinaryResponseContent(response=azure_response.response) def get_headers( self, diff --git a/litellm/llms/azure/files/handler.py b/litellm/llms/azure/files/handler.py index b299145ad4..fd1ef0d535 100644 --- a/litellm/llms/azure/files/handler.py +++ b/litellm/llms/azure/files/handler.py @@ -1,4 +1,4 @@ -from typing import Any, Coroutine, Optional, Union +from typing import Any, Coroutine, Optional, Union, cast import httpx from openai import AsyncAzureOpenAI, AzureOpenAI @@ -111,7 +111,7 @@ class AzureOpenAIFilesAPI(BaseLLM): openai_client: AsyncAzureOpenAI, ) -> HttpxBinaryResponseContent: response = await openai_client.files.content(**file_content_request) - return response + return HttpxBinaryResponseContent(response=response.response) def file_content( self, @@ -152,9 +152,11 @@ class AzureOpenAIFilesAPI(BaseLLM): file_content_request=file_content_request, openai_client=openai_client, ) - response = openai_client.files.content(**file_content_request) + response = cast(AzureOpenAI, openai_client).files.content( + **file_content_request + ) - return response + return HttpxBinaryResponseContent(response=response.response) async def aretrieve_file( self, diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 90b642a567..e73c1d55ec 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -1253,13 +1253,13 @@ class OpenAIChatCompletion(BaseLLM): client=client, ) - response = openai_client.audio.speech.create( + response = cast(OpenAI, openai_client).audio.speech.create( model=model, voice=voice, # type: ignore input=input, **optional_params, ) - return response # type: ignore + return HttpxBinaryResponseContent(response=response.response) async def async_audio_speech( self, @@ -1276,13 +1276,16 @@ class OpenAIChatCompletion(BaseLLM): client=None, ) -> HttpxBinaryResponseContent: - openai_client = self._get_openai_client( - is_async=True, - api_key=api_key, - api_base=api_base, - timeout=timeout, - max_retries=max_retries, - client=client, + openai_client = cast( + AsyncOpenAI, + self._get_openai_client( + is_async=True, + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + client=client, + ), ) response = await openai_client.audio.speech.create( @@ -1292,7 +1295,7 @@ class OpenAIChatCompletion(BaseLLM): **optional_params, ) - return response + return HttpxBinaryResponseContent(response=response.response) async def ahealth_check( self, @@ -1477,7 +1480,7 @@ class OpenAIFilesAPI(BaseLLM): openai_client: AsyncOpenAI, ) -> HttpxBinaryResponseContent: response = await openai_client.files.content(**file_content_request) - return response + return HttpxBinaryResponseContent(response=response.response) def file_content( self, @@ -1515,9 +1518,9 @@ class OpenAIFilesAPI(BaseLLM): file_content_request=file_content_request, openai_client=openai_client, ) - response = openai_client.files.content(**file_content_request) + response = cast(OpenAI, openai_client).files.content(**file_content_request) - return response + return HttpxBinaryResponseContent(response=response.response) async def aretrieve_file( self, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 2a22c10456..3aeccebf60 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -5986,6 +5986,15 @@ "litellm_provider": "bedrock", "mode": "embedding" }, + "meta.llama3-3-70b-instruct-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000072, + "output_cost_per_token": 0.00000072, + "litellm_provider": "bedrock", + "mode": "chat" + }, "meta.llama2-13b-chat-v1": { "max_tokens": 4096, "max_input_tokens": 4096, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index a46e8d1eaf..a556dd1e2c 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -12,12 +12,6 @@ model_list: model: openai/gpt-4o api_key: os.environ/OPENAI_API_KEY num_retries: 3 - - model_name: azure-tts - litellm_params: - model: azure/tts-1 - api_key: "6a4ULQtTSBZi5TONpzGP2GkKDUSSReHFQYwjMpZaywNxY03FsplGJQQJ99ALACHrzpqXJ3w3AAAAACOGUsjy" - api_base: "https://krris-m4rve6fd-northcentralus.cognitiveservices.azure.com/openai/deployments/tts" - api_version: "2024-05-01-preview" litellm_settings: - success_callback: ["langfuse"] \ No newline at end of file + success_callback: ["langfuse"] diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py index 355a476d47..3240bfb8a3 100644 --- a/litellm/proxy/spend_tracking/spend_tracking_utils.py +++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py @@ -1,7 +1,7 @@ import json import secrets from datetime import datetime as dt -from typing import Optional +from typing import Optional, cast from pydantic import BaseModel @@ -40,7 +40,9 @@ def get_logging_payload( if kwargs is None: kwargs = {} - if response_obj is None: + if response_obj is None or ( + not isinstance(response_obj, BaseModel) and not isinstance(response_obj, dict) + ): response_obj = {} # standardize this function to be used across, s3, dynamoDB, langfuse logging litellm_params = kwargs.get("litellm_params", {}) @@ -50,10 +52,10 @@ def get_logging_payload( completion_start_time = kwargs.get("completion_start_time", end_time) call_type = kwargs.get("call_type") cache_hit = kwargs.get("cache_hit", False) - usage = response_obj.get("usage", None) or {} + usage = cast(dict, response_obj).get("usage", None) or {} if isinstance(usage, litellm.Usage): usage = dict(usage) - id = response_obj.get("id") or kwargs.get("litellm_call_id") + id = cast(dict, response_obj).get("id") or kwargs.get("litellm_call_id") api_key = metadata.get("user_api_key", "") if api_key is not None and isinstance(api_key, str): if api_key.startswith("sk-"): diff --git a/litellm/rerank_api/main.py b/litellm/rerank_api/main.py index 57a353adbe..315109280c 100644 --- a/litellm/rerank_api/main.py +++ b/litellm/rerank_api/main.py @@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler from litellm.llms.jina_ai.rerank.handler import JinaAIRerank from litellm.llms.together_ai.rerank.handler import TogetherAIRerank from litellm.rerank_api.rerank_utils import get_optional_rerank_params -from litellm.secret_managers.main import get_secret +from litellm.secret_managers.main import get_secret, get_secret_str from litellm.types.rerank import OptionalRerankParams, RerankResponse from litellm.types.router import * from litellm.utils import ProviderConfigManager, client, exception_type @@ -211,7 +211,7 @@ def rerank( # noqa: PLR0915 dynamic_api_base or optional_params.api_base or litellm.api_base - or get_secret("INFINITY_API_BASE") # type: ignore + or get_secret_str("INFINITY_API_BASE") ) if api_base is None: diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 26c0eab3a0..e544cf24d2 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -1,7 +1,9 @@ from os import PathLike from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union -from openai._legacy_response import HttpxBinaryResponseContent +from openai._legacy_response import ( + HttpxBinaryResponseContent as _HttpxBinaryResponseContent, +) from openai.lib.streaming._assistants import ( AssistantEventHandler, AssistantStreamManager, @@ -48,6 +50,11 @@ FileTypes = Union[ EmbeddingInput = Union[str, List[str]] +class HttpxBinaryResponseContent(_HttpxBinaryResponseContent): + _hidden_params: dict = {} + pass + + class NotGiven: """ A sentinel singleton class used to distinguish omitted keyword arguments diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 2a22c10456..3aeccebf60 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -5986,6 +5986,15 @@ "litellm_provider": "bedrock", "mode": "embedding" }, + "meta.llama3-3-70b-instruct-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000072, + "output_cost_per_token": 0.00000072, + "litellm_provider": "bedrock", + "mode": "chat" + }, "meta.llama2-13b-chat-v1": { "max_tokens": 4096, "max_input_tokens": 4096, diff --git a/tests/local_testing/test_amazing_vertex_completion.py b/tests/local_testing/test_amazing_vertex_completion.py index bb7b38addc..d7a76e8542 100644 --- a/tests/local_testing/test_amazing_vertex_completion.py +++ b/tests/local_testing/test_amazing_vertex_completion.py @@ -210,7 +210,7 @@ async def test_get_router_response(): # reason="Local test. Vertex AI Quota is low. Leads to rate limit errors on ci/cd." # ) # @pytest.mark.flaky(retries=3, delay=1) -def test_vertex_ai_anthropic(): +def test_aavertex_ai_anthropic(): model = "claude-3-sonnet@20240229" vertex_ai_project = "adroit-crow-413218" diff --git a/tests/local_testing/test_audio_speech.py b/tests/local_testing/test_audio_speech.py index 9e60add17a..da8872a785 100644 --- a/tests/local_testing/test_audio_speech.py +++ b/tests/local_testing/test_audio_speech.py @@ -61,7 +61,7 @@ async def test_audio_speech_litellm(sync_mode, model, api_base, api_key): optional_params={}, ) - from litellm.llms.openai.openai import HttpxBinaryResponseContent + from litellm.types.llms.openai import HttpxBinaryResponseContent assert isinstance(response, HttpxBinaryResponseContent) else: