diff --git a/.circleci/config.yml b/.circleci/config.yml index 5e0431e480..c31a07a22b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -817,6 +817,7 @@ jobs: - run: python ./tests/documentation_tests/test_api_docs.py - run: python ./tests/code_coverage_tests/ensure_async_clients_test.py - run: python ./tests/code_coverage_tests/enforce_llms_folder_style.py + - run: python ./tests/documentation_tests/test_circular_imports.py - run: helm lint ./deploy/charts/litellm-helm db_migration_disable_update_check: diff --git a/enterprise/enterprise_hooks/secret_detection.py b/enterprise/enterprise_hooks/secret_detection.py index 414f3c4ddf..50ccccfde4 100644 --- a/enterprise/enterprise_hooks/secret_detection.py +++ b/enterprise/enterprise_hooks/secret_detection.py @@ -474,12 +474,9 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail): from detect_secrets import SecretsCollection from detect_secrets.settings import default_settings - print("INSIDE SECRET DETECTION PRE-CALL HOOK!") - if await self.should_run_check(user_api_key_dict) is False: return - print("RUNNING CHECK!") if "messages" in data and isinstance(data["messages"], list): for message in data["messages"]: if "content" in message and isinstance(message["content"], str): diff --git a/litellm/__init__.py b/litellm/__init__.py index 2d2b66795d..b8d722ad99 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -32,7 +32,7 @@ from litellm.proxy._types import ( KeyManagementSettings, LiteLLM_UpperboundKeyGenerateParams, ) -from litellm.types.utils import StandardKeyGenerationConfig +from litellm.types.utils import StandardKeyGenerationConfig, LlmProviders import httpx import dotenv from enum import Enum @@ -838,71 +838,6 @@ model_list = ( ) -class LlmProviders(str, Enum): - OPENAI = "openai" - OPENAI_LIKE = "openai_like" # embedding only - JINA_AI = "jina_ai" - XAI = "xai" - CUSTOM_OPENAI = "custom_openai" - TEXT_COMPLETION_OPENAI = "text-completion-openai" - COHERE = "cohere" - COHERE_CHAT = "cohere_chat" - CLARIFAI = "clarifai" - ANTHROPIC = "anthropic" - ANTHROPIC_TEXT = "anthropic_text" - REPLICATE = "replicate" - HUGGINGFACE = "huggingface" - TOGETHER_AI = "together_ai" - OPENROUTER = "openrouter" - VERTEX_AI = "vertex_ai" - VERTEX_AI_BETA = "vertex_ai_beta" - GEMINI = "gemini" - AI21 = "ai21" - BASETEN = "baseten" - AZURE = "azure" - AZURE_TEXT = "azure_text" - AZURE_AI = "azure_ai" - SAGEMAKER = "sagemaker" - SAGEMAKER_CHAT = "sagemaker_chat" - BEDROCK = "bedrock" - VLLM = "vllm" - NLP_CLOUD = "nlp_cloud" - PETALS = "petals" - OOBABOOGA = "oobabooga" - OLLAMA = "ollama" - OLLAMA_CHAT = "ollama_chat" - DEEPINFRA = "deepinfra" - PERPLEXITY = "perplexity" - MISTRAL = "mistral" - GROQ = "groq" - NVIDIA_NIM = "nvidia_nim" - CEREBRAS = "cerebras" - AI21_CHAT = "ai21_chat" - VOLCENGINE = "volcengine" - CODESTRAL = "codestral" - TEXT_COMPLETION_CODESTRAL = "text-completion-codestral" - DEEPSEEK = "deepseek" - SAMBANOVA = "sambanova" - MARITALK = "maritalk" - VOYAGE = "voyage" - CLOUDFLARE = "cloudflare" - XINFERENCE = "xinference" - FIREWORKS_AI = "fireworks_ai" - FRIENDLIAI = "friendliai" - WATSONX = "watsonx" - WATSONX_TEXT = "watsonx_text" - TRITON = "triton" - PREDIBASE = "predibase" - DATABRICKS = "databricks" - EMPOWER = "empower" - GITHUB = "github" - CUSTOM = "custom" - LITELLM_PROXY = "litellm_proxy" - HOSTED_VLLM = "hosted_vllm" - LM_STUDIO = "lm_studio" - GALADRIEL = "galadriel" - - provider_list: List[Union[LlmProviders, str]] = list(LlmProviders) diff --git a/litellm/adapters/anthropic_adapter.py b/litellm/adapters/anthropic_adapter.py index 47fba36309..b8ce225ada 100644 --- a/litellm/adapters/anthropic_adapter.py +++ b/litellm/adapters/anthropic_adapter.py @@ -18,7 +18,7 @@ from litellm.types.llms.anthropic import ( AnthropicResponse, ContentBlockDelta, ) -from litellm.types.utils import AdapterCompletionStreamWrapper +from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse class AnthropicAdapter(CustomLogger): @@ -41,7 +41,7 @@ class AnthropicAdapter(CustomLogger): return translated_body def translate_completion_output_params( - self, response: litellm.ModelResponse + self, response: ModelResponse ) -> Optional[AnthropicResponse]: return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic( diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 4f6bf5c19c..2e6edb7571 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -484,7 +484,7 @@ def completion_cost( # noqa: PLR0915 completion_characters: Optional[int] = None cache_creation_input_tokens: Optional[int] = None cache_read_input_tokens: Optional[int] = None - cost_per_token_usage_object: Optional[litellm.Usage] = _get_usage_object( + cost_per_token_usage_object: Optional[Usage] = _get_usage_object( completion_response=completion_response ) if completion_response is not None and ( @@ -492,7 +492,7 @@ def completion_cost( # noqa: PLR0915 or isinstance(completion_response, dict) ): # tts returns a custom class - usage_obj: Optional[Union[dict, litellm.Usage]] = completion_response.get( # type: ignore + usage_obj: Optional[Union[dict, Usage]] = completion_response.get( # type: ignore "usage", {} ) if isinstance(usage_obj, BaseModel) and not isinstance( diff --git a/litellm/integrations/SlackAlerting/slack_alerting.py b/litellm/integrations/SlackAlerting/slack_alerting.py index d585e235b7..bd3c3b8253 100644 --- a/litellm/integrations/SlackAlerting/slack_alerting.py +++ b/litellm/integrations/SlackAlerting/slack_alerting.py @@ -39,6 +39,7 @@ from litellm.proxy._types import ( VirtualKeyEvent, WebhookEvent, ) +from litellm.router import Router from litellm.types.integrations.slack_alerting import * from litellm.types.router import LiteLLM_Params @@ -93,7 +94,7 @@ class SlackAlerting(CustomBatchLogger): alert_types: Optional[List[AlertType]] = None, alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] = None, alerting_args: Optional[Dict] = None, - llm_router: Optional[litellm.Router] = None, + llm_router: Optional[Router] = None, ): if alerting is not None: self.alerting = alerting diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index d534d4da3b..23ebb6ccd5 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -18,6 +18,7 @@ from pydantic import BaseModel import litellm from litellm import ( + _custom_logger_compatible_callbacks_literal, json_logs, log_raw_request_response, turn_off_message_logging, @@ -41,6 +42,7 @@ from litellm.types.utils import ( CallTypes, EmbeddingResponse, ImageResponse, + LiteLLMLoggingBaseClass, ModelResponse, StandardCallbackDynamicParams, StandardLoggingAdditionalHeaders, @@ -190,7 +192,7 @@ in_memory_trace_id_cache = ServiceTraceIDCache() in_memory_dynamic_logger_cache = DynamicLoggingCache() -class Logging: +class Logging(LiteLLMLoggingBaseClass): global supabaseClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app custom_pricing: bool = False stream_options = None @@ -2142,7 +2144,7 @@ def set_callbacks(callback_list, function_id=None): # noqa: PLR0915 def _init_custom_logger_compatible_class( # noqa: PLR0915 - logging_integration: litellm._custom_logger_compatible_callbacks_literal, + logging_integration: _custom_logger_compatible_callbacks_literal, internal_usage_cache: Optional[DualCache], llm_router: Optional[ Any @@ -2362,7 +2364,7 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915 def get_custom_logger_compatible_class( # noqa: PLR0915 - logging_integration: litellm._custom_logger_compatible_callbacks_literal, + logging_integration: _custom_logger_compatible_callbacks_literal, ) -> Optional[CustomLogger]: if logging_integration == "lago": for callback in _in_memory_loggers: diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py index 9877c683c9..79933a462e 100644 --- a/litellm/litellm_core_utils/prompt_templates/factory.py +++ b/litellm/litellm_core_utils/prompt_templates/factory.py @@ -13,7 +13,6 @@ from jinja2.sandbox import ImmutableSandboxedEnvironment import litellm import litellm.types import litellm.types.llms -import litellm.types.llms.vertex_ai from litellm import verbose_logger from litellm.llms.custom_httpx.http_handler import HTTPHandler from litellm.types.completion import ( @@ -40,6 +39,9 @@ from litellm.types.llms.openai import ( ChatCompletionUserMessage, OpenAIMessageContentListBlock, ) +from litellm.types.llms.vertex_ai import FunctionCall as VertexFunctionCall +from litellm.types.llms.vertex_ai import FunctionResponse as VertexFunctionResponse +from litellm.types.llms.vertex_ai import PartType as VertexPartType from litellm.types.utils import GenericImageParsingChunk from .common_utils import convert_content_list_to_str, is_non_content_values_set @@ -965,11 +967,11 @@ def infer_protocol_value( def _gemini_tool_call_invoke_helper( function_call_params: ChatCompletionToolCallFunctionChunk, -) -> Optional[litellm.types.llms.vertex_ai.FunctionCall]: +) -> Optional[VertexFunctionCall]: name = function_call_params.get("name", "") or "" arguments = function_call_params.get("arguments", "") arguments_dict = json.loads(arguments) - function_call = litellm.types.llms.vertex_ai.FunctionCall( + function_call = VertexFunctionCall( name=name, args=arguments_dict, ) @@ -978,7 +980,7 @@ def _gemini_tool_call_invoke_helper( def convert_to_gemini_tool_call_invoke( message: ChatCompletionAssistantMessage, -) -> List[litellm.types.llms.vertex_ai.PartType]: +) -> List[VertexPartType]: """ OpenAI tool invokes: { @@ -1019,22 +1021,20 @@ def convert_to_gemini_tool_call_invoke( - json.load the arguments """ try: - _parts_list: List[litellm.types.llms.vertex_ai.PartType] = [] + _parts_list: List[VertexPartType] = [] tool_calls = message.get("tool_calls", None) function_call = message.get("function_call", None) if tool_calls is not None: for tool in tool_calls: if "function" in tool: - gemini_function_call: Optional[ - litellm.types.llms.vertex_ai.FunctionCall - ] = _gemini_tool_call_invoke_helper( - function_call_params=tool["function"] + gemini_function_call: Optional[VertexFunctionCall] = ( + _gemini_tool_call_invoke_helper( + function_call_params=tool["function"] + ) ) if gemini_function_call is not None: _parts_list.append( - litellm.types.llms.vertex_ai.PartType( - function_call=gemini_function_call - ) + VertexPartType(function_call=gemini_function_call) ) else: # don't silently drop params. Make it clear to user what's happening. raise Exception( @@ -1047,11 +1047,7 @@ def convert_to_gemini_tool_call_invoke( function_call_params=function_call ) if gemini_function_call is not None: - _parts_list.append( - litellm.types.llms.vertex_ai.PartType( - function_call=gemini_function_call - ) - ) + _parts_list.append(VertexPartType(function_call=gemini_function_call)) else: # don't silently drop params. Make it clear to user what's happening. raise Exception( "function_call missing. Received tool call with 'type': 'function'. No function call in argument - {}".format( @@ -1070,7 +1066,7 @@ def convert_to_gemini_tool_call_invoke( def convert_to_gemini_tool_call_result( message: Union[ChatCompletionToolMessage, ChatCompletionFunctionMessage], last_message_with_tool_calls: Optional[dict], -) -> litellm.types.llms.vertex_ai.PartType: +) -> VertexPartType: """ OpenAI message with a tool result looks like: { @@ -1119,11 +1115,11 @@ def convert_to_gemini_tool_call_result( # We can't determine from openai message format whether it's a successful or # error call result so default to the successful result template - _function_response = litellm.types.llms.vertex_ai.FunctionResponse( + _function_response = VertexFunctionResponse( name=name, response={"content": content_str} # type: ignore ) - _part = litellm.types.llms.vertex_ai.PartType(function_response=_function_response) + _part = VertexPartType(function_response=_function_response) return _part diff --git a/litellm/llms/anthropic/experimental_pass_through/transformation.py b/litellm/llms/anthropic/experimental_pass_through/transformation.py index 5de232e1eb..7880827cce 100644 --- a/litellm/llms/anthropic/experimental_pass_through/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/transformation.py @@ -5,6 +5,12 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice import litellm +from litellm.litellm_core_utils.prompt_templates.factory import ( + anthropic_messages_pt, + custom_prompt, + prompt_factory, +) +from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.types.llms.anthropic import ( AllAnthropicToolsValues, AnthopicMessagesAssistantMessageParam, @@ -53,15 +59,9 @@ from litellm.types.llms.openai import ( ChatCompletionUserMessage, OpenAIMessageContent, ) -from litellm.types.utils import Choices, GenericStreamingChunk -from litellm.utils import CustomStreamWrapper, ModelResponse, Usage +from litellm.types.utils import Choices, GenericStreamingChunk, ModelResponse, Usage from ...base import BaseLLM -from litellm.litellm_core_utils.prompt_templates.factory import ( - anthropic_messages_pt, - custom_prompt, - prompt_factory, -) class AnthropicExperimentalPassThroughConfig: @@ -338,7 +338,7 @@ class AnthropicExperimentalPassThroughConfig: return "end_turn" def translate_openai_response_to_anthropic( - self, response: litellm.ModelResponse + self, response: ModelResponse ) -> AnthropicResponse: ## translate content block anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore @@ -347,7 +347,7 @@ class AnthropicExperimentalPassThroughConfig: openai_finish_reason=response.choices[0].finish_reason # type: ignore ) # extract usage - usage: litellm.Usage = getattr(response, "usage") + usage: Usage = getattr(response, "usage") anthropic_usage = AnthropicResponseUsageBlock( input_tokens=usage.prompt_tokens or 0, output_tokens=usage.completion_tokens or 0, @@ -393,7 +393,7 @@ class AnthropicExperimentalPassThroughConfig: return "text_delta", ContentTextBlockDelta(type="text_delta", text=text) def translate_streaming_openai_response_to_anthropic( - self, response: litellm.ModelResponse + self, response: ModelResponse ) -> Union[ContentBlockDelta, MessageBlockDelta]: ## base case - final chunk w/ finish reason if response.choices[0].finish_reason is not None: @@ -403,7 +403,7 @@ class AnthropicExperimentalPassThroughConfig: ), ) if getattr(response, "usage", None) is not None: - litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore + litellm_usage_chunk: Optional[Usage] = response.usage # type: ignore elif ( hasattr(response, "_hidden_params") and "usage" in response._hidden_params diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py index 33261a7a7e..ffef8007e6 100644 --- a/litellm/llms/azure/azure.py +++ b/litellm/llms/azure/azure.py @@ -17,10 +17,14 @@ from litellm.llms.custom_httpx.http_handler import ( HTTPHandler, get_async_httpx_client, ) -from litellm.types.utils import EmbeddingResponse +from litellm.types.utils import ( + EmbeddingResponse, + ImageResponse, + LlmProviders, + ModelResponse, +) from litellm.utils import ( CustomStreamWrapper, - ModelResponse, UnsupportedParamsError, convert_to_model_response_object, get_secret, @@ -853,7 +857,7 @@ class AzureChatCompletion(BaseLLM): client=None, aembedding=None, headers: Optional[dict] = None, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: if headers: optional_params["extra_headers"] = headers if self._client_session is None: @@ -963,7 +967,7 @@ class AzureChatCompletion(BaseLLM): _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0) async_handler = get_async_httpx_client( - llm_provider=litellm.LlmProviders.AZURE, + llm_provider=LlmProviders.AZURE, params=_params, ) else: @@ -1242,11 +1246,11 @@ class AzureChatCompletion(BaseLLM): api_key: Optional[str] = None, api_base: Optional[str] = None, api_version: Optional[str] = None, - model_response: Optional[litellm.utils.ImageResponse] = None, + model_response: Optional[ImageResponse] = None, azure_ad_token: Optional[str] = None, client=None, aimg_generation=None, - ) -> litellm.ImageResponse: + ) -> ImageResponse: try: if model and len(model) > 0: model = model @@ -1510,7 +1514,7 @@ class AzureChatCompletion(BaseLLM): ) -> dict: client_session = ( litellm.aclient_session - or get_async_httpx_client(llm_provider=litellm.LlmProviders.AZURE).client + or get_async_httpx_client(llm_provider=LlmProviders.AZURE).client ) # handle dall-e-2 calls if "gateway.ai.cloudflare.com" in api_base: diff --git a/litellm/llms/azure/chat/gpt_transformation.py b/litellm/llms/azure/chat/gpt_transformation.py index bd737af2fb..5af1a675aa 100644 --- a/litellm/llms/azure/chat/gpt_transformation.py +++ b/litellm/llms/azure/chat/gpt_transformation.py @@ -4,7 +4,11 @@ from typing import TYPE_CHECKING, Any, List, Optional, Type, Union from httpx._models import Headers, Response import litellm +from litellm.litellm_core_utils.prompt_templates.factory import ( + convert_to_azure_openai_messages, +) from litellm.llms.base_llm.transformation import BaseLLMException +from litellm.types.utils import ModelResponse from ....exceptions import UnsupportedParamsError from ....types.llms.openai import ( @@ -14,9 +18,7 @@ from ....types.llms.openai import ( ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk, ) - from ...base_llm.transformation import BaseConfig -from litellm.litellm_core_utils.prompt_templates.factory import convert_to_azure_openai_messages from ..common_utils import AzureOpenAIError if TYPE_CHECKING: @@ -26,6 +28,7 @@ if TYPE_CHECKING: else: LoggingClass = Any + class AzureOpenAIConfig(BaseConfig): """ Reference: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions @@ -221,7 +224,7 @@ class AzureOpenAIConfig(BaseConfig): self, model: str, raw_response: Response, - model_response: litellm.ModelResponse, + model_response: ModelResponse, logging_obj: LoggingClass, request_data: dict, messages: List[AllMessageValues], @@ -230,7 +233,7 @@ class AzureOpenAIConfig(BaseConfig): encoding: Any, api_key: Optional[str] = None, json_mode: Optional[bool] = None, - ) -> litellm.ModelResponse: + ) -> ModelResponse: raise NotImplementedError( "Azure OpenAI handler.py has custom logic for transforming response, as it uses the OpenAI SDK." ) diff --git a/litellm/llms/azure_ai/embed/handler.py b/litellm/llms/azure_ai/embed/handler.py index 5b86f0b255..f5c3a1a6b5 100644 --- a/litellm/llms/azure_ai/embed/handler.py +++ b/litellm/llms/azure_ai/embed/handler.py @@ -89,7 +89,7 @@ class AzureAIEmbedding(OpenAIChatCompletion): embedding_response = response.json() embedding_headers = dict(response.headers) - returned_response: litellm.EmbeddingResponse = convert_to_model_response_object( # type: ignore + returned_response: EmbeddingResponse = convert_to_model_response_object( # type: ignore response_object=embedding_response, model_response_object=model_response, response_type="embedding", @@ -104,7 +104,7 @@ class AzureAIEmbedding(OpenAIChatCompletion): data: ImageEmbeddingRequest, timeout: float, logging_obj, - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, optional_params: dict, api_key: Optional[str], api_base: Optional[str], @@ -132,7 +132,7 @@ class AzureAIEmbedding(OpenAIChatCompletion): embedding_response = response.json() embedding_headers = dict(response.headers) - returned_response: litellm.EmbeddingResponse = convert_to_model_response_object( # type: ignore + returned_response: EmbeddingResponse = convert_to_model_response_object( # type: ignore response_object=embedding_response, model_response_object=model_response, response_type="embedding", @@ -213,14 +213,14 @@ class AzureAIEmbedding(OpenAIChatCompletion): input: List, timeout: float, logging_obj, - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, optional_params: dict, api_key: Optional[str] = None, api_base: Optional[str] = None, client=None, aembedding=None, max_retries: Optional[int] = None, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: """ - Separate image url from text -> route image url call to `/image/embeddings` diff --git a/litellm/llms/base.py b/litellm/llms/base.py index 943b10182b..611b0aa902 100644 --- a/litellm/llms/base.py +++ b/litellm/llms/base.py @@ -5,6 +5,8 @@ import httpx import requests import litellm +from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper +from litellm.types.utils import ModelResponse, TextCompletionResponse class BaseLLM: @@ -15,7 +17,7 @@ class BaseLLM: self, model: str, response: Union[requests.Response, httpx.Response], - model_response: litellm.utils.ModelResponse, + model_response: ModelResponse, stream: bool, logging_obj: Any, optional_params: dict, @@ -24,7 +26,7 @@ class BaseLLM: messages: list, print_verbose, encoding, - ) -> Union[litellm.utils.ModelResponse, litellm.utils.CustomStreamWrapper]: + ) -> Union[ModelResponse, CustomStreamWrapper]: """ Helper function to process the response across sync + async completion calls """ @@ -34,7 +36,7 @@ class BaseLLM: self, model: str, response: Union[requests.Response, httpx.Response], - model_response: litellm.utils.TextCompletionResponse, + model_response: TextCompletionResponse, stream: bool, logging_obj: Any, optional_params: dict, @@ -43,7 +45,7 @@ class BaseLLM: messages: list, print_verbose, encoding, - ) -> Union[litellm.utils.TextCompletionResponse, litellm.utils.CustomStreamWrapper]: + ) -> Union[TextCompletionResponse, CustomStreamWrapper]: """ Helper function to process the response across sync + async completion calls """ diff --git a/litellm/llms/bedrock/chat/invoke_handler.py b/litellm/llms/bedrock/chat/invoke_handler.py index a2a8228e27..963e3fca59 100644 --- a/litellm/llms/bedrock/chat/invoke_handler.py +++ b/litellm/llms/bedrock/chat/invoke_handler.py @@ -32,6 +32,17 @@ from litellm import verbose_logger from litellm.caching.caching import InMemoryCache from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.litellm_core_utils.litellm_logging import Logging +from litellm.litellm_core_utils.prompt_templates.factory import ( + _bedrock_converse_messages_pt, + _bedrock_tools_pt, + cohere_message_pt, + construct_tool_use_system_prompt, + contains_tag, + custom_prompt, + extract_between_tags, + parse_xml_params, + prompt_factory, +) from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, @@ -50,20 +61,10 @@ from litellm.types.llms.openai import ( ChatCompletionUsageBlock, ) from litellm.types.utils import GenericStreamingChunk as GChunk -from litellm.utils import CustomStreamWrapper, ModelResponse, Usage, get_secret +from litellm.types.utils import ModelResponse, Usage +from litellm.utils import CustomStreamWrapper, get_secret from ..base_aws_llm import BaseAWSLLM -from litellm.litellm_core_utils.prompt_templates.factory import ( - _bedrock_converse_messages_pt, - _bedrock_tools_pt, - cohere_message_pt, - construct_tool_use_system_prompt, - contains_tag, - custom_prompt, - extract_between_tags, - parse_xml_params, - prompt_factory, -) from ..common_utils import BedrockError, ModelResponseIterator, get_bedrock_tool_name from .converse_transformation import AmazonConverseConfig @@ -1317,7 +1318,7 @@ class MockResponseIterator: # for returning ai21 streaming responses def _chunk_parser(self, chunk_data: ModelResponse) -> GChunk: try: - chunk_usage: litellm.Usage = getattr(chunk_data, "usage") + chunk_usage: Usage = getattr(chunk_data, "usage") text = chunk_data.choices[0].message.content or "" # type: ignore tool_use = None if self.json_mode is True: diff --git a/litellm/llms/clarifai/chat/transformation.py b/litellm/llms/clarifai/chat/transformation.py index 53ddfcdfa6..fac16f7ca6 100644 --- a/litellm/llms/clarifai/chat/transformation.py +++ b/litellm/llms/clarifai/chat/transformation.py @@ -5,9 +5,11 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, List, Optional, import httpx import litellm +from litellm.litellm_core_utils.prompt_templates.common_utils import ( + convert_content_list_to_str, +) from litellm.llms.base_llm.base_model_iterator import FakeStreamResponseIterator from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException -from litellm.litellm_core_utils.prompt_templates.common_utils import convert_content_list_to_str from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import ( ChatCompletionToolCallChunk, @@ -152,7 +154,7 @@ class ClarifaiConfig(BaseConfig): encoding: str, api_key: Optional[str] = None, json_mode: Optional[bool] = None, - ) -> litellm.ModelResponse: + ) -> ModelResponse: logging_obj.post_call( input=messages, api_key=api_key, diff --git a/litellm/llms/codestral/completion/handler.py b/litellm/llms/codestral/completion/handler.py index 43bbafaefe..e04da501bf 100644 --- a/litellm/llms/codestral/completion/handler.py +++ b/litellm/llms/codestral/completion/handler.py @@ -29,6 +29,7 @@ from litellm.llms.custom_httpx.http_handler import ( ) from litellm.llms.openai.completion.transformation import OpenAITextCompletionConfig from litellm.types.llms.databricks import GenericStreamingChunk +from litellm.types.utils import TextChoices from litellm.utils import ( Choices, CustomStreamWrapper, @@ -169,7 +170,7 @@ class CodestralTextCompletion(BaseLLM): raise TextCompletionCodestralError(message=response.text, status_code=422) _original_choices = completion_response.get("choices", []) - _choices: List[litellm.utils.TextChoices] = [] + _choices: List[TextChoices] = [] for choice in _original_choices: # This is what 1 choice looks like from codestral API # { diff --git a/litellm/llms/cohere/embed/handler.py b/litellm/llms/cohere/embed/handler.py index afeba10b58..5258df2b7f 100644 --- a/litellm/llms/cohere/embed/handler.py +++ b/litellm/llms/cohere/embed/handler.py @@ -17,6 +17,7 @@ from litellm.llms.custom_httpx.http_handler import ( get_async_httpx_client, ) from litellm.types.llms.bedrock import CohereEmbeddingRequest +from litellm.types.utils import EmbeddingResponse from litellm.utils import Choices, Message, ModelResponse, Usage from .transformation import CohereEmbeddingConfig @@ -118,7 +119,7 @@ async def async_embedding( def embedding( model: str, input: list, - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, logging_obj: LiteLLMLoggingObj, optional_params: dict, headers: dict, diff --git a/litellm/llms/databricks/embed/handler.py b/litellm/llms/databricks/embed/handler.py index 4ed5853762..284988ffec 100644 --- a/litellm/llms/databricks/embed/handler.py +++ b/litellm/llms/databricks/embed/handler.py @@ -21,7 +21,7 @@ class DatabricksEmbeddingHandler(OpenAILikeEmbeddingHandler, DatabricksBase): api_key: Optional[str], api_base: Optional[str], optional_params: dict, - model_response: Optional[litellm.utils.EmbeddingResponse] = None, + model_response: Optional[EmbeddingResponse] = None, client=None, aembedding=None, custom_endpoint: Optional[bool] = None, diff --git a/litellm/llms/databricks/streaming_utils.py b/litellm/llms/databricks/streaming_utils.py index 502f4a0912..b9f54c04dd 100644 --- a/litellm/llms/databricks/streaming_utils.py +++ b/litellm/llms/databricks/streaming_utils.py @@ -55,9 +55,7 @@ class ModelResponseIterator: is_finished = True finish_reason = processed_chunk.choices[0].finish_reason - usage_chunk: Optional[litellm.Usage] = getattr( - processed_chunk, "usage", None - ) + usage_chunk: Optional[Usage] = getattr(processed_chunk, "usage", None) if usage_chunk is not None: usage = ChatCompletionUsageBlock( diff --git a/litellm/llms/huggingface/chat/handler.py b/litellm/llms/huggingface/chat/handler.py index 9ed841e379..eadb62fb30 100644 --- a/litellm/llms/huggingface/chat/handler.py +++ b/litellm/llms/huggingface/chat/handler.py @@ -24,6 +24,7 @@ import requests import litellm from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, @@ -36,8 +37,9 @@ from litellm.llms.huggingface.chat.transformation import ( from litellm.secret_managers.main import get_secret_str from litellm.types.completion import ChatCompletionMessageToolCallParam from litellm.types.llms.openai import AllMessageValues +from litellm.types.utils import EmbeddingResponse from litellm.types.utils import Logprobs as TextCompletionLogprobs -from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage +from litellm.types.utils import ModelResponse, Usage from ...base import BaseLLM from ..common_utils import HuggingfaceError, hf_task_list, hf_tasks @@ -453,11 +455,11 @@ class Huggingface(BaseLLM): def _process_embedding_response( self, embeddings: dict, - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, model: str, input: List, encoding: Any, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: output_data = [] if "similarities" in embeddings: for idx, embedding in embeddings["similarities"]: @@ -583,7 +585,7 @@ class Huggingface(BaseLLM): self, model: str, input: list, - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, optional_params: dict, logging_obj: LiteLLMLoggingObj, encoding: Callable, @@ -593,7 +595,7 @@ class Huggingface(BaseLLM): aembedding: Optional[bool] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, headers={}, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: super().embedding() headers = hf_chat_config.validate_environment( api_key=api_key, diff --git a/litellm/llms/huggingface/chat/transformation.py b/litellm/llms/huggingface/chat/transformation.py index 155fc8dc81..8238d1be41 100644 --- a/litellm/llms/huggingface/chat/transformation.py +++ b/litellm/llms/huggingface/chat/transformation.py @@ -8,10 +8,15 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union import httpx import litellm +from litellm.litellm_core_utils.prompt_templates.common_utils import ( + convert_content_list_to_str, +) +from litellm.litellm_core_utils.prompt_templates.factory import ( + custom_prompt, + prompt_factory, +) from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException -from litellm.litellm_core_utils.prompt_templates.common_utils import convert_content_list_to_str -from litellm.litellm_core_utils.prompt_templates.factory import custom_prompt, prompt_factory from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import Choices, Message, ModelResponse, Usage @@ -407,7 +412,7 @@ class HuggingfaceChatConfig(BaseConfig): def convert_to_model_response_object( # noqa: PLR0915 self, completion_response: Union[List[Dict[str, Any]], Dict[str, Any]], - model_response: litellm.ModelResponse, + model_response: ModelResponse, task: Optional[hf_tasks], optional_params: dict, encoding: Any, diff --git a/litellm/llms/ollama/completion/handler.py b/litellm/llms/ollama/completion/handler.py index 5d6e2ef2dc..d50e7d5e64 100644 --- a/litellm/llms/ollama/completion/handler.py +++ b/litellm/llms/ollama/completion/handler.py @@ -14,11 +14,20 @@ import requests # type: ignore import litellm from litellm import verbose_logger +from litellm.litellm_core_utils.prompt_templates.factory import ( + custom_prompt, + prompt_factory, +) from litellm.llms.custom_httpx.http_handler import get_async_httpx_client from litellm.secret_managers.main import get_secret_str -from litellm.types.utils import ModelInfo, ProviderField, StreamingChoices +from litellm.types.utils import ( + EmbeddingResponse, + ModelInfo, + ModelResponse, + ProviderField, + StreamingChoices, +) -from litellm.litellm_core_utils.prompt_templates.factory import custom_prompt, prompt_factory from ..common_utils import OllamaError from .transformation import OllamaConfig @@ -53,7 +62,7 @@ def _convert_image(image): # ollama implementation def get_ollama_response( - model_response: litellm.ModelResponse, + model_response: ModelResponse, model: str, prompt: str, optional_params: dict, @@ -391,7 +400,7 @@ async def ollama_aembeddings( api_base: str, model: str, prompts: List[str], - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, optional_params: dict, logging_obj: Any, encoding: Any, @@ -479,7 +488,7 @@ def ollama_embeddings( model: str, prompts: list, optional_params: dict, - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, logging_obj: Any, encoding=None, ): diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index 47555a3a48..a0ccb81730 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -17,7 +17,7 @@ from litellm.llms.custom_httpx.http_handler import get_async_httpx_client from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig from litellm.types.llms.ollama import OllamaToolCall, OllamaToolCallFunction from litellm.types.llms.openai import ChatCompletionAssistantToolCall -from litellm.types.utils import StreamingChoices +from litellm.types.utils import ModelResponse, StreamingChoices class OllamaError(Exception): @@ -198,7 +198,7 @@ class OllamaChatConfig(OpenAIGPTConfig): # ollama implementation def get_ollama_response( # noqa: PLR0915 - model_response: litellm.ModelResponse, + model_response: ModelResponse, messages: list, optional_params: dict, model: str, diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index cc1552ba0b..b2d14a3187 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -28,24 +28,31 @@ import litellm from litellm import LlmProviders from litellm._logging import verbose_logger from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.litellm_core_utils.prompt_templates.factory import ( + custom_prompt, + prompt_factory, +) from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS from litellm.secret_managers.main import get_secret_str -from litellm.types.utils import ProviderField +from litellm.types.utils import ( + EmbeddingResponse, + ImageResponse, + ModelResponse, + ProviderField, + TextCompletionResponse, + Usage, +) from litellm.utils import ( Choices, CustomStreamWrapper, Message, - ModelResponse, ProviderConfigManager, - TextCompletionResponse, - Usage, convert_to_model_response_object, ) from ...types.llms.openai import * from ..base import BaseLLM -from litellm.litellm_core_utils.prompt_templates.factory import custom_prompt, prompt_factory from .chat.gpt_transformation import OpenAIGPTConfig from .common_utils import OpenAIError, drop_params_from_unprocessable_entity_error @@ -882,7 +889,7 @@ class OpenAIChatCompletion(BaseLLM): self, input: list, data: dict, - model_response: litellm.utils.EmbeddingResponse, + model_response: EmbeddingResponse, timeout: float, logging_obj: LiteLLMLoggingObj, api_key: Optional[str] = None, @@ -911,9 +918,7 @@ class OpenAIChatCompletion(BaseLLM): additional_args={"complete_input_dict": data}, original_response=stringified_response, ) - returned_response: ( - litellm.EmbeddingResponse - ) = convert_to_model_response_object( + returned_response: EmbeddingResponse = convert_to_model_response_object( response_object=stringified_response, model_response_object=model_response, response_type="embedding", @@ -953,14 +958,14 @@ class OpenAIChatCompletion(BaseLLM): input: list, timeout: float, logging_obj, - model_response: litellm.utils.EmbeddingResponse, + model_response: EmbeddingResponse, optional_params: dict, api_key: Optional[str] = None, api_base: Optional[str] = None, client=None, aembedding=None, max_retries: Optional[int] = None, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: super().embedding() try: model = model @@ -1011,7 +1016,7 @@ class OpenAIChatCompletion(BaseLLM): additional_args={"complete_input_dict": data}, original_response=sync_embedding_response, ) - response: litellm.EmbeddingResponse = convert_to_model_response_object( + response: EmbeddingResponse = convert_to_model_response_object( response_object=sync_embedding_response.model_dump(), model_response_object=model_response, _response_headers=headers, @@ -1068,7 +1073,7 @@ class OpenAIChatCompletion(BaseLLM): except Exception as e: ## LOGGING logging_obj.post_call( - input=input, + input=prompt, api_key=api_key, original_response=str(e), ) @@ -1083,10 +1088,10 @@ class OpenAIChatCompletion(BaseLLM): logging_obj: Any, api_key: Optional[str] = None, api_base: Optional[str] = None, - model_response: Optional[litellm.utils.ImageResponse] = None, + model_response: Optional[ImageResponse] = None, client=None, aimg_generation=None, - ) -> litellm.ImageResponse: + ) -> ImageResponse: data = {} try: model = model diff --git a/litellm/llms/openai_like/chat/transformation.py b/litellm/llms/openai_like/chat/transformation.py index c8511cb630..9d89e5d09f 100644 --- a/litellm/llms/openai_like/chat/transformation.py +++ b/litellm/llms/openai_like/chat/transformation.py @@ -3,7 +3,7 @@ OpenAI-like chat completion transformation """ import types -from typing import List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union import httpx from pydantic import BaseModel @@ -16,6 +16,13 @@ from litellm.types.utils import ModelResponse from ....utils import _remove_additional_properties, _remove_strict_from_schema from ...openai.chat.gpt_transformation import OpenAIGPTConfig +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + + LiteLLMLoggingObj = _LiteLLMLoggingObj +else: + LiteLLMLoggingObj = Any + class OpenAILikeChatConfig(OpenAIGPTConfig): def _get_openai_compatible_provider_info( @@ -64,7 +71,7 @@ class OpenAILikeChatConfig(OpenAIGPTConfig): response: httpx.Response, model_response: ModelResponse, stream: bool, - logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore + logging_obj: LiteLLMLoggingObj, optional_params: dict, api_key: Optional[str], data: Union[dict, str], diff --git a/litellm/llms/openai_like/embedding/handler.py b/litellm/llms/openai_like/embedding/handler.py index e786b5db89..03e7c0fb2e 100644 --- a/litellm/llms/openai_like/embedding/handler.py +++ b/litellm/llms/openai_like/embedding/handler.py @@ -11,8 +11,7 @@ from enum import Enum from functools import partial from typing import Any, Callable, List, Literal, Optional, Tuple, Union -import httpx # type: ignore -import requests # type: ignore +import httpx import litellm from litellm.litellm_core_utils.core_helpers import map_finish_reason @@ -21,7 +20,7 @@ from litellm.llms.custom_httpx.http_handler import ( HTTPHandler, get_async_httpx_client, ) -from litellm.utils import EmbeddingResponse +from litellm.types.utils import EmbeddingResponse from ..common_utils import OpenAILikeBase, OpenAILikeError @@ -100,7 +99,7 @@ class OpenAILikeEmbeddingHandler(OpenAILikeBase): api_key: Optional[str], api_base: Optional[str], optional_params: dict, - model_response: Optional[litellm.utils.EmbeddingResponse] = None, + model_response: Optional[EmbeddingResponse] = None, client=None, aembedding=None, custom_endpoint: Optional[bool] = None, diff --git a/litellm/llms/petals/completion/transformation.py b/litellm/llms/petals/completion/transformation.py index 17386a7df5..52b8cd178d 100644 --- a/litellm/llms/petals/completion/transformation.py +++ b/litellm/llms/petals/completion/transformation.py @@ -10,6 +10,7 @@ from litellm.llms.base_llm.transformation import ( LiteLLMLoggingObj, ) from litellm.types.llms.openai import AllMessageValues +from litellm.types.utils import ModelResponse from ..common_utils import PetalsError @@ -111,7 +112,7 @@ class PetalsConfig(BaseConfig): self, model: str, raw_response: Response, - model_response: litellm.ModelResponse, + model_response: ModelResponse, logging_obj: LiteLLMLoggingObj, request_data: dict, messages: List[AllMessageValues], @@ -120,7 +121,7 @@ class PetalsConfig(BaseConfig): encoding: Any, api_key: Optional[str] = None, json_mode: Optional[bool] = None, - ) -> litellm.ModelResponse: + ) -> ModelResponse: raise NotImplementedError( "Petals transformation currently done in handler.py. [TODO] Move to the transformation.py" ) diff --git a/litellm/llms/predibase/chat/handler.py b/litellm/llms/predibase/chat/handler.py index 4ed2e83f63..7352c2204c 100644 --- a/litellm/llms/predibase/chat/handler.py +++ b/litellm/llms/predibase/chat/handler.py @@ -27,6 +27,7 @@ from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, get_async_httpx_client, ) +from litellm.types.utils import LiteLLMLoggingBaseClass from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage from ...base import BaseLLM @@ -92,7 +93,7 @@ class PredibaseChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, + logging_obj: LiteLLMLoggingBaseClass, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/llms/sagemaker/completion/transformation.py b/litellm/llms/sagemaker/completion/transformation.py index f7bac46b6f..0a91819b7b 100644 --- a/litellm/llms/sagemaker/completion/transformation.py +++ b/litellm/llms/sagemaker/completion/transformation.py @@ -13,10 +13,13 @@ from httpx._models import Headers, Response import litellm from litellm.litellm_core_utils.asyncify import asyncify +from litellm.litellm_core_utils.prompt_templates.factory import ( + custom_prompt, + prompt_factory, +) from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException -from litellm.litellm_core_utils.prompt_templates.factory import custom_prompt, prompt_factory from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import Usage +from litellm.types.utils import ModelResponse, Usage from ..common_utils import SagemakerError @@ -197,7 +200,7 @@ class SagemakerConfig(BaseConfig): self, model: str, raw_response: Response, - model_response: litellm.ModelResponse, + model_response: ModelResponse, logging_obj: LiteLLMLoggingObj, request_data: dict, messages: List[AllMessageValues], @@ -206,7 +209,7 @@ class SagemakerConfig(BaseConfig): encoding: str, api_key: Optional[str] = None, json_mode: Optional[bool] = None, - ) -> litellm.ModelResponse: + ) -> ModelResponse: completion_response = raw_response.json() ## LOGGING logging_obj.post_call( diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py index d67419767a..e6ac6928bd 100644 --- a/litellm/llms/vertex_ai/gemini/transformation.py +++ b/litellm/llms/vertex_ai/gemini/transformation.py @@ -5,20 +5,20 @@ Why separate file? Make it easy to see how transformation works """ import os -from typing import List, Literal, Optional, Tuple, Union, cast +from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union, cast import httpx from pydantic import BaseModel import litellm from litellm._logging import verbose_logger -from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.litellm_core_utils.prompt_templates.factory import ( convert_to_anthropic_image_obj, convert_to_gemini_tool_call_invoke, convert_to_gemini_tool_call_result, response_schema_prompt, ) +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.types.files import ( get_file_mime_type_for_file_type, get_file_type_from_extension, @@ -49,6 +49,13 @@ from ..common_utils import ( get_supports_system_message, ) +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + + LiteLLMLoggingObj = _LiteLLMLoggingObj +else: + LiteLLMLoggingObj = Any + def _process_gemini_image(image_url: str) -> PartType: """ @@ -348,7 +355,7 @@ def sync_transform_request_body( timeout: Optional[Union[float, httpx.Timeout]], extra_headers: Optional[dict], optional_params: dict, - logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore + logging_obj: LiteLLMLoggingObj, custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"], litellm_params: dict, ) -> RequestBody: diff --git a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py index 7c288da70b..bb39fcb1ad 100644 --- a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py +++ b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py @@ -10,18 +10,17 @@ from litellm.llms.custom_httpx.http_handler import ( HTTPHandler, get_async_httpx_client, ) -from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( - VertexLLM, -) +from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM +from litellm.types.utils import ImageResponse class VertexImageGeneration(VertexLLM): def process_image_generation_response( self, json_response: Dict[str, Any], - model_response: litellm.ImageResponse, + model_response: ImageResponse, model: Optional[str] = None, - ) -> litellm.ImageResponse: + ) -> ImageResponse: if "predictions" not in json_response: raise litellm.InternalServerError( message=f"image generation response does not contain 'predictions', got {json_response}", @@ -46,7 +45,7 @@ class VertexImageGeneration(VertexLLM): vertex_project: Optional[str], vertex_location: Optional[str], vertex_credentials: Optional[str], - model_response: litellm.ImageResponse, + model_response: ImageResponse, logging_obj: Any, model: Optional[ str @@ -55,7 +54,7 @@ class VertexImageGeneration(VertexLLM): optional_params: Optional[dict] = None, timeout: Optional[int] = None, aimg_generation=False, - ) -> litellm.ImageResponse: + ) -> ImageResponse: if aimg_generation is True: return self.aimage_generation( # type: ignore prompt=prompt, diff --git a/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py index 81708530a2..eaffbd38ee 100644 --- a/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py +++ b/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py @@ -22,7 +22,7 @@ from litellm.types.llms.vertex_ai import ( MultimodalPredictions, VertexMultimodalEmbeddingRequest, ) -from litellm.types.utils import Embedding +from litellm.types.utils import Embedding, EmbeddingResponse from litellm.utils import is_base64_encoded @@ -39,7 +39,7 @@ class VertexMultimodalEmbedding(VertexLLM): model: str, input: Union[list, str], print_verbose, - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, custom_llm_provider: Literal["gemini", "vertex_ai"], optional_params: dict, logging_obj: LiteLLMLoggingObj, @@ -52,7 +52,7 @@ class VertexMultimodalEmbedding(VertexLLM): aembedding=False, timeout=300, client=None, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: _auth_header, vertex_project = self._ensure_access_token( credentials=vertex_credentials, diff --git a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py index d3621f411b..d1634f1108 100644 --- a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py +++ b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py @@ -15,12 +15,10 @@ from litellm.llms.custom_httpx.http_handler import ( _get_httpx_client, get_async_httpx_client, ) -from litellm.llms.vertex_ai.vertex_ai_non_gemini import ( - VertexAIError, -) +from litellm.llms.vertex_ai.vertex_ai_non_gemini import VertexAIError from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.types.llms.vertex_ai import * -from litellm.utils import Usage +from litellm.types.utils import EmbeddingResponse, Usage from .transformation import VertexAITextEmbeddingConfig from .types import * @@ -35,7 +33,7 @@ class VertexEmbedding(VertexBase): model: str, input: Union[list, str], print_verbose, - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, optional_params: dict, logging_obj: LiteLLMLoggingObject, custom_llm_provider: Literal[ @@ -52,7 +50,7 @@ class VertexEmbedding(VertexBase): vertex_credentials: Optional[str] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: if aembedding is True: return self.async_embedding( # type: ignore model=model, diff --git a/litellm/llms/vertex_ai/vertex_embeddings/transformation.py b/litellm/llms/vertex_ai/vertex_embeddings/transformation.py index 6f4b25ceff..00f384c32c 100644 --- a/litellm/llms/vertex_ai/vertex_embeddings/transformation.py +++ b/litellm/llms/vertex_ai/vertex_embeddings/transformation.py @@ -4,7 +4,7 @@ from typing import List, Literal, Optional, Union from pydantic import BaseModel import litellm -from litellm.utils import Usage +from litellm.types.utils import EmbeddingResponse, Usage from .types import * @@ -198,8 +198,8 @@ class VertexAITextEmbeddingConfig(BaseModel): return text_embedding_input def transform_vertex_response_to_openai( - self, response: dict, model: str, model_response: litellm.EmbeddingResponse - ) -> litellm.EmbeddingResponse: + self, response: dict, model: str, model_response: EmbeddingResponse + ) -> EmbeddingResponse: """ Transforms a vertex embedding response to an openai response. """ @@ -234,8 +234,8 @@ class VertexAITextEmbeddingConfig(BaseModel): return model_response def _transform_vertex_response_to_openai_for_fine_tuned_models( - self, response: dict, model: str, model_response: litellm.EmbeddingResponse - ) -> litellm.EmbeddingResponse: + self, response: dict, model: str, model_response: EmbeddingResponse + ) -> EmbeddingResponse: """ Transforms a vertex fine-tuned model embedding response to an openai response format. """ diff --git a/litellm/llms/watsonx/completion/handler.py b/litellm/llms/watsonx/completion/handler.py index 599338c308..df8600e99d 100644 --- a/litellm/llms/watsonx/completion/handler.py +++ b/litellm/llms/watsonx/completion/handler.py @@ -24,6 +24,8 @@ import httpx # type: ignore import requests # type: ignore import litellm +from litellm.litellm_core_utils.prompt_templates import factory as ptf +from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, get_async_httpx_client, @@ -34,7 +36,6 @@ from litellm.types.llms.watsonx import WatsonXAIEndpoint from litellm.utils import EmbeddingResponse, ModelResponse, Usage, map_finish_reason from ...base import BaseLLM -from litellm.litellm_core_utils.prompt_templates import factory as ptf from ..common_utils import WatsonXAIError, _get_api_params, generate_iam_token from .transformation import IBMWatsonXAIConfig @@ -204,7 +205,7 @@ class IBMWatsonXAI(BaseLLM): def process_stream_response( stream_resp: Union[Iterator[str], AsyncIterator], - ) -> litellm.CustomStreamWrapper: + ) -> CustomStreamWrapper: streamwrapper = litellm.CustomStreamWrapper( stream_resp, model=model, @@ -235,7 +236,7 @@ class IBMWatsonXAI(BaseLLM): json_resp = resp.json() return self._process_text_gen_response(json_resp, model_response) - def handle_stream_request(request_params: dict) -> litellm.CustomStreamWrapper: + def handle_stream_request(request_params: dict) -> CustomStreamWrapper: # stream the response - generated chunks will be handled # by litellm.utils.CustomStreamWrapper.handle_watsonx_stream with self.request_manager.request( @@ -249,7 +250,7 @@ class IBMWatsonXAI(BaseLLM): async def handle_stream_request_async( request_params: dict, - ) -> litellm.CustomStreamWrapper: + ) -> CustomStreamWrapper: # stream the response - generated chunks will be handled # by litellm.utils.CustomStreamWrapper.handle_watsonx_stream async with self.request_manager.async_request( @@ -321,14 +322,14 @@ class IBMWatsonXAI(BaseLLM): self, model: str, input: Union[list, str], - model_response: litellm.EmbeddingResponse, + model_response: EmbeddingResponse, api_key: Optional[str], logging_obj: Any, optional_params: dict, encoding=None, print_verbose=None, aembedding=None, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: """ Send a text embedding request to the IBM Watsonx.ai API. """ diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index b74e5199e8..45e8c844c4 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -35,6 +35,7 @@ from litellm.proxy._types import ( ) from litellm.proxy.auth.route_checks import RouteChecks from litellm.proxy.utils import PrismaClient, ProxyLogging, log_db_metrics +from litellm.router import Router from litellm.types.services import ServiceLoggerPayload, ServiceTypes from .auth_checks_organization import organization_role_based_access_check @@ -61,7 +62,7 @@ def common_checks( # noqa: PLR0915 global_proxy_spend: Optional[float], general_settings: dict, route: str, - llm_router: Optional[litellm.Router], + llm_router: Optional[Router], ) -> bool: """ Common checks across jwt + key-based auth. @@ -347,7 +348,7 @@ async def get_end_user_object( def model_in_access_group( - model: str, team_models: Optional[List[str]], llm_router: Optional[litellm.Router] + model: str, team_models: Optional[List[str]], llm_router: Optional[Router] ) -> bool: from collections import defaultdict diff --git a/litellm/proxy/example_config_yaml/custom_handler.py b/litellm/proxy/example_config_yaml/custom_handler.py index fdde975d6f..52e6686e6a 100644 --- a/litellm/proxy/example_config_yaml/custom_handler.py +++ b/litellm/proxy/example_config_yaml/custom_handler.py @@ -4,10 +4,11 @@ from typing import Any, Optional import litellm from litellm import CustomLLM, ImageObject, ImageResponse, completion, get_llm_provider from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler +from litellm.types.utils import ModelResponse class MyCustomLLM(CustomLLM): - def completion(self, *args, **kwargs) -> litellm.ModelResponse: + def completion(self, *args, **kwargs) -> ModelResponse: return litellm.completion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}], diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py index db5972a7cf..7a23817655 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py +++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py @@ -45,6 +45,7 @@ from litellm.types.guardrails import ( BedrockTextContent, GuardrailEventHooks, ) +from litellm.types.utils import ModelResponse GUARDRAIL_NAME = "bedrock" @@ -70,7 +71,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM): def convert_to_bedrock_format( self, messages: Optional[List[Dict[str, str]]] = None, - response: Optional[Union[Any, litellm.ModelResponse]] = None, + response: Optional[Union[Any, ModelResponse]] = None, ) -> BedrockRequest: bedrock_request: BedrockRequest = BedrockRequest(source="INPUT") bedrock_request_content: List[BedrockContentItem] = [] diff --git a/litellm/proxy/hooks/prompt_injection_detection.py b/litellm/proxy/hooks/prompt_injection_detection.py index be4eb79885..19e152c2c5 100644 --- a/litellm/proxy/hooks/prompt_injection_detection.py +++ b/litellm/proxy/hooks/prompt_injection_detection.py @@ -20,8 +20,11 @@ import litellm from litellm._logging import verbose_proxy_logger from litellm.caching.caching import DualCache from litellm.integrations.custom_logger import CustomLogger -from litellm.litellm_core_utils.prompt_templates.factory import prompt_injection_detection_default_pt +from litellm.litellm_core_utils.prompt_templates.factory import ( + prompt_injection_detection_default_pt, +) from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth +from litellm.router import Router from litellm.utils import get_formatted_prompt @@ -32,7 +35,7 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger): prompt_injection_params: Optional[LiteLLMPromptInjectionParams] = None, ): self.prompt_injection_params = prompt_injection_params - self.llm_router: Optional[litellm.Router] = None + self.llm_router: Optional[Router] = None self.verbs = [ "Ignore", @@ -74,7 +77,7 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger): if litellm.set_verbose is True: print(print_statement) # noqa - def update_environment(self, router: Optional[litellm.Router] = None): + def update_environment(self, router: Optional[Router] = None): self.llm_router = router if ( diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py index 9161eb8493..4799de9eba 100644 --- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py +++ b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py @@ -16,6 +16,7 @@ from litellm.llms.anthropic.chat.handler import ( from litellm.llms.anthropic.chat.transformation import AnthropicConfig from litellm.proxy._types import PassThroughEndpointLoggingTypedDict from litellm.proxy.pass_through_endpoints.types import PassthroughStandardLoggingPayload +from litellm.types.utils import ModelResponse, TextCompletionResponse if TYPE_CHECKING: from ..success_handler import PassThroughEndpointLogging @@ -43,9 +44,7 @@ class AnthropicPassthroughLoggingHandler: Transforms Anthropic response to OpenAI response, generates a standard logging object so downstream logging can be handled """ model = response_body.get("model", "") - litellm_model_response: ( - litellm.ModelResponse - ) = AnthropicConfig().transform_response( + litellm_model_response: ModelResponse = AnthropicConfig().transform_response( raw_response=httpx_response, model_response=litellm.ModelResponse(), model=model, @@ -89,9 +88,7 @@ class AnthropicPassthroughLoggingHandler: @staticmethod def _create_anthropic_response_logging_payload( - litellm_model_response: Union[ - litellm.ModelResponse, litellm.TextCompletionResponse - ], + litellm_model_response: Union[ModelResponse, TextCompletionResponse], model: str, kwargs: dict, start_time: datetime, @@ -204,7 +201,7 @@ class AnthropicPassthroughLoggingHandler: all_chunks: List[str], litellm_logging_obj: LiteLLMLoggingObj, model: str, - ) -> Optional[Union[litellm.ModelResponse, litellm.TextCompletionResponse]]: + ) -> Optional[Union[ModelResponse, TextCompletionResponse]]: """ Builds complete response from raw Anthropic chunks diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py index a6c9046b32..ea287b57d5 100644 --- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py +++ b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py @@ -15,6 +15,12 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( ModelResponseIterator as VertexModelResponseIterator, ) from litellm.proxy._types import PassThroughEndpointLoggingTypedDict +from litellm.types.utils import ( + EmbeddingResponse, + ImageResponse, + ModelResponse, + TextCompletionResponse, +) if TYPE_CHECKING: from ..success_handler import PassThroughEndpointLogging @@ -40,7 +46,7 @@ class VertexPassthroughLoggingHandler: model = VertexPassthroughLoggingHandler.extract_model_from_url(url_route) instance_of_vertex_llm = litellm.VertexGeminiConfig() - litellm_model_response: litellm.ModelResponse = ( + litellm_model_response: ModelResponse = ( instance_of_vertex_llm.transform_response( model=model, messages=[ @@ -82,8 +88,8 @@ class VertexPassthroughLoggingHandler: _json_response = httpx_response.json() litellm_prediction_response: Union[ - litellm.ModelResponse, litellm.EmbeddingResponse, litellm.ImageResponse - ] = litellm.ModelResponse() + ModelResponse, EmbeddingResponse, ImageResponse + ] = ModelResponse() if vertex_image_generation_class.is_image_generation_response( _json_response ): @@ -176,7 +182,7 @@ class VertexPassthroughLoggingHandler: all_chunks: List[str], litellm_logging_obj: LiteLLMLoggingObj, model: str, - ) -> Optional[Union[litellm.ModelResponse, litellm.TextCompletionResponse]]: + ) -> Optional[Union[ModelResponse, TextCompletionResponse]]: vertex_iterator = VertexModelResponseIterator( streaming_response=None, sync_stream=False, @@ -212,9 +218,7 @@ class VertexPassthroughLoggingHandler: @staticmethod def _create_vertex_response_logging_payload_for_generate_content( - litellm_model_response: Union[ - litellm.ModelResponse, litellm.TextCompletionResponse - ], + litellm_model_response: Union[ModelResponse, TextCompletionResponse], model: str, kwargs: dict, start_time: datetime, diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index bee43e4d43..4210d6035c 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -109,6 +109,7 @@ from litellm import ( CreateBatchRequest, ListBatchRequest, RetrieveBatchRequest, + Router, ) from litellm._logging import verbose_proxy_logger, verbose_router_logger from litellm.caching.caching import DualCache, RedisCache @@ -482,7 +483,7 @@ user_config_file_path: Optional[str] = None local_logging = True # writes logs to a local api_log.json file for debugging experimental = False #### GLOBAL VARIABLES #### -llm_router: Optional[litellm.Router] = None +llm_router: Optional[Router] = None llm_model_list: Optional[list] = None general_settings: dict = {} callback_settings: dict = {} @@ -2833,7 +2834,7 @@ class ProxyStartupEvent: @classmethod def _initialize_startup_logging( cls, - llm_router: Optional[litellm.Router], + llm_router: Optional[Router], proxy_logging_obj: ProxyLogging, redis_usage_cache: Optional[RedisCache], ): diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index d7b275cf0c..9ae26aec09 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -289,7 +289,7 @@ class ProxyLogging: def startup_event( self, - llm_router: Optional[litellm.Router], + llm_router: Optional[Router], redis_usage_cache: Optional[RedisCache], ): """Initialize logging and alerting on proxy startup""" @@ -359,7 +359,7 @@ class ProxyLogging: if redis_cache is not None: self.internal_usage_cache.dual_cache.redis_cache = redis_cache - def _init_litellm_callbacks(self, llm_router: Optional[litellm.Router] = None): + def _init_litellm_callbacks(self, llm_router: Optional[Router] = None): litellm.callbacks.append(self.max_parallel_request_limiter) # type: ignore litellm.callbacks.append(self.max_budget_limiter) # type: ignore litellm.callbacks.append(self.cache_control_check) # type: ignore diff --git a/litellm/router.py b/litellm/router.py index f9860a6799..0fd39caa05 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -145,6 +145,7 @@ from litellm.types.utils import ModelInfo as ModelMapInfo from litellm.types.utils import StandardLoggingPayload from litellm.utils import ( CustomStreamWrapper, + EmbeddingResponse, ModelResponse, _is_region_eu, calculate_max_parallel_requests, @@ -2071,7 +2072,7 @@ class Router: input: Union[str, List], is_async: Optional[bool] = False, **kwargs, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: try: kwargs["model"] = model kwargs["input"] = input @@ -2146,7 +2147,7 @@ class Router: input: Union[str, List], is_async: Optional[bool] = True, **kwargs, - ) -> litellm.EmbeddingResponse: + ) -> EmbeddingResponse: try: kwargs["model"] = model kwargs["input"] = input diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 0f925816d5..46017a7963 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1660,3 +1660,84 @@ class PersonalUIKeyGenerationConfig(KeyGenerationConfig): class StandardKeyGenerationConfig(TypedDict, total=False): team_key_generation: TeamUIKeyGenerationConfig personal_key_generation: PersonalUIKeyGenerationConfig + + +class LlmProviders(str, Enum): + OPENAI = "openai" + OPENAI_LIKE = "openai_like" # embedding only + JINA_AI = "jina_ai" + XAI = "xai" + CUSTOM_OPENAI = "custom_openai" + TEXT_COMPLETION_OPENAI = "text-completion-openai" + COHERE = "cohere" + COHERE_CHAT = "cohere_chat" + CLARIFAI = "clarifai" + ANTHROPIC = "anthropic" + ANTHROPIC_TEXT = "anthropic_text" + REPLICATE = "replicate" + HUGGINGFACE = "huggingface" + TOGETHER_AI = "together_ai" + OPENROUTER = "openrouter" + VERTEX_AI = "vertex_ai" + VERTEX_AI_BETA = "vertex_ai_beta" + GEMINI = "gemini" + AI21 = "ai21" + BASETEN = "baseten" + AZURE = "azure" + AZURE_TEXT = "azure_text" + AZURE_AI = "azure_ai" + SAGEMAKER = "sagemaker" + SAGEMAKER_CHAT = "sagemaker_chat" + BEDROCK = "bedrock" + VLLM = "vllm" + NLP_CLOUD = "nlp_cloud" + PETALS = "petals" + OOBABOOGA = "oobabooga" + OLLAMA = "ollama" + OLLAMA_CHAT = "ollama_chat" + DEEPINFRA = "deepinfra" + PERPLEXITY = "perplexity" + MISTRAL = "mistral" + GROQ = "groq" + NVIDIA_NIM = "nvidia_nim" + CEREBRAS = "cerebras" + AI21_CHAT = "ai21_chat" + VOLCENGINE = "volcengine" + CODESTRAL = "codestral" + TEXT_COMPLETION_CODESTRAL = "text-completion-codestral" + DEEPSEEK = "deepseek" + SAMBANOVA = "sambanova" + MARITALK = "maritalk" + VOYAGE = "voyage" + CLOUDFLARE = "cloudflare" + XINFERENCE = "xinference" + FIREWORKS_AI = "fireworks_ai" + FRIENDLIAI = "friendliai" + WATSONX = "watsonx" + WATSONX_TEXT = "watsonx_text" + TRITON = "triton" + PREDIBASE = "predibase" + DATABRICKS = "databricks" + EMPOWER = "empower" + GITHUB = "github" + CUSTOM = "custom" + LITELLM_PROXY = "litellm_proxy" + HOSTED_VLLM = "hosted_vllm" + LM_STUDIO = "lm_studio" + GALADRIEL = "galadriel" + + +class LiteLLMLoggingBaseClass: + """ + Base class for logging pre and post call + + Meant to simplify type checking for logging obj. + """ + + def pre_call(self, input, api_key, model=None, additional_args={}): + pass + + def post_call( + self, original_response, input=None, api_key=None, additional_args={} + ): + pass diff --git a/litellm/utils.py b/litellm/utils.py index ddaf60d5a1..6f662f6595 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -126,6 +126,7 @@ from litellm.types.utils import ( EmbeddingResponse, Function, ImageResponse, + LlmProviders, Message, ModelInfo, ModelResponse, @@ -147,6 +148,7 @@ claude_json_str = json.dumps(json_data) import importlib.metadata from concurrent.futures import ThreadPoolExecutor from typing import ( + TYPE_CHECKING, Any, Callable, Dict, @@ -163,6 +165,8 @@ from typing import ( from openai import OpenAIError as OriginalError +from litellm.llms.base_llm.transformation import BaseConfig + from ._logging import verbose_logger from .caching.caching import ( Cache, @@ -235,7 +239,6 @@ last_fetched_at = None last_fetched_at_keys = None ######## Model Response ######################### - # All liteLLM Model responses will be in this format, Follows the OpenAI Format # https://docs.litellm.ai/docs/completion/output # { @@ -6205,13 +6208,10 @@ def validate_chat_completion_user_messages(messages: List[AllMessageValues]): return messages -from litellm.llms.base_llm.transformation import BaseConfig - - class ProviderConfigManager: @staticmethod def get_provider_chat_config( # noqa: PLR0915 - model: str, provider: litellm.LlmProviders + model: str, provider: LlmProviders ) -> BaseConfig: """ Returns the provider config for a given provider. diff --git a/tests/documentation_tests/test_api_docs.py b/tests/documentation_tests/test_api_docs.py index 407010dcc6..edab7c1d34 100644 --- a/tests/documentation_tests/test_api_docs.py +++ b/tests/documentation_tests/test_api_docs.py @@ -173,8 +173,8 @@ def main(): "list_organization", "user_update", ] - directory = "../../litellm/proxy/management_endpoints" # LOCAL - # directory = "./litellm/proxy/management_endpoints" + # directory = "../../litellm/proxy/management_endpoints" # LOCAL + directory = "./litellm/proxy/management_endpoints" # Convert function names to set for faster lookup target_functions = set(function_names) diff --git a/tests/documentation_tests/test_circular_imports.py b/tests/documentation_tests/test_circular_imports.py new file mode 100644 index 0000000000..b7ea13cd12 --- /dev/null +++ b/tests/documentation_tests/test_circular_imports.py @@ -0,0 +1,162 @@ +import os +import ast +import sys +from typing import List, Tuple, Optional + + +def find_litellm_type_hints(directory: str) -> List[Tuple[str, int, str]]: + """ + Recursively search for Python files in the given directory + and find type hints containing 'litellm.'. + + Args: + directory (str): The root directory to search for Python files + + Returns: + List of tuples containing (file_path, line_number, type_hint) + """ + litellm_type_hints = [] + + def is_litellm_type_hint(node): + """ + Recursively check if a type annotation contains 'litellm.' + + Handles more complex type hints like: + - Optional[litellm.Type] + - Union[litellm.Type1, litellm.Type2] + - Nested type hints + """ + try: + # Convert node to string representation + type_str = ast.unparse(node) + + # Direct check for litellm in type string + if "litellm." in type_str: + return True + + # Handle more complex type hints + if isinstance(node, ast.Subscript): + # Check Union or Optional types + if isinstance(node.value, ast.Name) and node.value.id in [ + "Union", + "Optional", + ]: + # Check each element in the Union/Optional type + if isinstance(node.slice, ast.Tuple): + return any(is_litellm_type_hint(elt) for elt in node.slice.elts) + else: + return is_litellm_type_hint(node.slice) + + # Recursive check for subscripted types + return is_litellm_type_hint(node.value) or is_litellm_type_hint( + node.slice + ) + + # Recursive check for attribute types + if isinstance(node, ast.Attribute): + return "litellm." in ast.unparse(node) + + # Recursive check for name types + if isinstance(node, ast.Name): + return "litellm" in node.id + + return False + except Exception: + # Fallback to string checking if parsing fails + try: + return "litellm." in ast.unparse(node) + except: + return False + + def scan_file(file_path: str): + """ + Scan a single Python file for LiteLLM type hints + """ + try: + # Use utf-8-sig to handle files with BOM, ignore errors + with open(file_path, "r", encoding="utf-8-sig", errors="ignore") as file: + tree = ast.parse(file.read()) + + for node in ast.walk(tree): + # Check type annotations in variable annotations + if isinstance(node, ast.AnnAssign) and node.annotation: + if is_litellm_type_hint(node.annotation): + litellm_type_hints.append( + (file_path, node.lineno, ast.unparse(node.annotation)) + ) + + # Check type hints in function arguments + elif isinstance(node, ast.FunctionDef): + for arg in node.args.args: + if arg.annotation and is_litellm_type_hint(arg.annotation): + litellm_type_hints.append( + (file_path, arg.lineno, ast.unparse(arg.annotation)) + ) + + # Check return type annotation + if node.returns and is_litellm_type_hint(node.returns): + litellm_type_hints.append( + (file_path, node.lineno, ast.unparse(node.returns)) + ) + except SyntaxError as e: + print(f"Syntax error in {file_path}: {e}", file=sys.stderr) + except Exception as e: + print(f"Error processing {file_path}: {e}", file=sys.stderr) + + # Recursively walk through directory + for root, dirs, files in os.walk(directory): + # Remove virtual environment and cache directories from search + dirs[:] = [ + d + for d in dirs + if not any( + venv in d + for venv in [ + "venv", + "env", + "myenv", + ".venv", + "__pycache__", + ".pytest_cache", + ] + ) + ] + + for file in files: + if file.endswith(".py"): + full_path = os.path.join(root, file) + # Skip files in virtual environment or cache directories + if not any( + venv in full_path + for venv in [ + "venv", + "env", + "myenv", + ".venv", + "__pycache__", + ".pytest_cache", + ] + ): + scan_file(full_path) + + return litellm_type_hints + + +def main(): + # Get directory from command line argument or use current directory + directory = "./litellm/" + + # Find LiteLLM type hints + results = find_litellm_type_hints(directory) + + # Print results + if results: + print("LiteLLM Type Hints Found:") + for file_path, line_num, type_hint in results: + print(f"{file_path}:{line_num} - {type_hint}") + else: + print("No LiteLLM type hints found.") + + +if __name__ == "__main__": + main()