diff --git a/litellm/__init__.py b/litellm/__init__.py index d3d3dd0d4b..60b8cf81a0 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -53,6 +53,7 @@ from litellm.constants import ( cohere_embedding_models, bedrock_embedding_models, known_tokenizer_config, + BEDROCK_INVOKE_PROVIDERS_LITERAL, ) from litellm.types.guardrails import GuardrailItem from litellm.proxy._types import ( @@ -361,17 +362,7 @@ BEDROCK_CONVERSE_MODELS = [ "meta.llama3-2-11b-instruct-v1:0", "meta.llama3-2-90b-instruct-v1:0", ] -BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[ - "cohere", - "anthropic", - "mistral", - "amazon", - "meta", - "llama", - "ai21", - "nova", - "deepseek_r1", -] + ####### COMPLETION MODELS ################### open_ai_chat_completion_models: List = [] open_ai_text_completion_models: List = [] diff --git a/litellm/constants.py b/litellm/constants.py index 06756b8f20..0288c45e40 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Literal ROUTER_MAX_FALLBACKS = 5 DEFAULT_BATCH_SIZE = 512 @@ -320,6 +320,17 @@ baseten_models: List = [ "31dxrj3", ] # FALCON 7B # WizardLM # Mosaic ML +BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[ + "cohere", + "anthropic", + "mistral", + "amazon", + "meta", + "llama", + "ai21", + "nova", + "deepseek_r1", +] open_ai_embedding_models: List = ["text-embedding-ada-002"] cohere_embedding_models: List = [ diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index 9f9c810233..e4f87aa5b4 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -308,7 +308,6 @@ class AnthropicConfig(BaseConfig): model: str, drop_params: bool, ) -> dict: - for param, value in non_default_params.items(): if param == "max_tokens": optional_params["max_tokens"] = value @@ -342,6 +341,10 @@ class AnthropicConfig(BaseConfig): optional_params["top_p"] = value if param == "response_format" and isinstance(value, dict): + ignore_response_format_types = ["text"] + if value["type"] in ignore_response_format_types: # value is a no-op + continue + json_schema: Optional[dict] = None if "response_schema" in value: json_schema = value["response_schema"] diff --git a/litellm/llms/base_llm/chat/transformation.py b/litellm/llms/base_llm/chat/transformation.py index 020223f98e..8c9c5acda3 100644 --- a/litellm/llms/base_llm/chat/transformation.py +++ b/litellm/llms/base_llm/chat/transformation.py @@ -317,6 +317,7 @@ class BaseConfig(ABC): data: dict, messages: list, client: Optional[AsyncHTTPHandler] = None, + json_mode: Optional[bool] = None, ) -> CustomStreamWrapper: raise NotImplementedError @@ -330,6 +331,7 @@ class BaseConfig(ABC): data: dict, messages: list, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + json_mode: Optional[bool] = None, ) -> CustomStreamWrapper: raise NotImplementedError diff --git a/litellm/llms/bedrock/base_aws_llm.py b/litellm/llms/bedrock/base_aws_llm.py index 8158ceab8f..bf9a070f26 100644 --- a/litellm/llms/bedrock/base_aws_llm.py +++ b/litellm/llms/bedrock/base_aws_llm.py @@ -2,13 +2,14 @@ import hashlib import json import os from datetime import datetime -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast, get_args import httpx from pydantic import BaseModel from litellm._logging import verbose_logger from litellm.caching.caching import DualCache +from litellm.constants import BEDROCK_INVOKE_PROVIDERS_LITERAL from litellm.litellm_core_utils.dd_tracing import tracer from litellm.secret_managers.main import get_secret @@ -223,6 +224,60 @@ class BaseAWSLLM: # Catch any unexpected errors and return None return None + @staticmethod + def _get_provider_from_model_path( + model_path: str, + ) -> Optional[BEDROCK_INVOKE_PROVIDERS_LITERAL]: + """ + Helper function to get the provider from a model path with format: provider/model-name + + Args: + model_path (str): The model path (e.g., 'llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n' or 'anthropic/model-name') + + Returns: + Optional[str]: The provider name, or None if no valid provider found + """ + parts = model_path.split("/") + if len(parts) >= 1: + provider = parts[0] + if provider in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL): + return cast(BEDROCK_INVOKE_PROVIDERS_LITERAL, provider) + return None + + @staticmethod + def get_bedrock_invoke_provider( + model: str, + ) -> Optional[BEDROCK_INVOKE_PROVIDERS_LITERAL]: + """ + Helper function to get the bedrock provider from the model + + handles 3 scenarions: + 1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` + 2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` + 3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama` + 4. model=us.amazon.nova-pro-v1:0 -> Returns `nova` + """ + if model.startswith("invoke/"): + model = model.replace("invoke/", "", 1) + + _split_model = model.split(".")[0] + if _split_model in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL): + return cast(BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model) + + # If not a known provider, check for pattern with two slashes + provider = BaseAWSLLM._get_provider_from_model_path(model) + if provider is not None: + return provider + + # check if provider == "nova" + if "nova" in model: + return "nova" + else: + for provider in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL): + if provider in model: + return provider + return None + def _get_aws_region_name( self, optional_params: dict, model: Optional[str] = None ) -> str: diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index b86fb7f0f3..3837369a8e 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -206,7 +206,12 @@ class AmazonConverseConfig(BaseConfig): messages: Optional[List[AllMessageValues]] = None, ) -> dict: for param, value in non_default_params.items(): - if param == "response_format": + if param == "response_format" and isinstance(value, dict): + + ignore_response_format_types = ["text"] + if value["type"] in ignore_response_format_types: # value is a no-op + continue + json_schema: Optional[dict] = None schema_name: str = "" if "response_schema" in value: diff --git a/litellm/llms/bedrock/chat/invoke_handler.py b/litellm/llms/bedrock/chat/invoke_handler.py index 32cd137d93..56cf891e76 100644 --- a/litellm/llms/bedrock/chat/invoke_handler.py +++ b/litellm/llms/bedrock/chat/invoke_handler.py @@ -226,6 +226,7 @@ async def make_call( decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder( model=model, sync_stream=False, + json_mode=json_mode, ) completion_stream = decoder.aiter_bytes( response.aiter_bytes(chunk_size=1024) @@ -311,6 +312,7 @@ def make_sync_call( decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder( model=model, sync_stream=True, + json_mode=json_mode, ) completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024)) elif bedrock_invoke_provider == "deepseek_r1": @@ -1149,27 +1151,6 @@ class BedrockLLM(BaseAWSLLM): ) return streaming_response - @staticmethod - def get_bedrock_invoke_provider( - model: str, - ) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]: - """ - Helper function to get the bedrock provider from the model - - handles 2 scenarions: - 1. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` - 2. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama` - """ - _split_model = model.split(".")[0] - if _split_model in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL): - return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model) - - # If not a known provider, check for pattern with two slashes - provider = BedrockLLM._get_provider_from_model_path(model) - if provider is not None: - return provider - return None - @staticmethod def _get_provider_from_model_path( model_path: str, @@ -1524,6 +1505,7 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder): self, model: str, sync_stream: bool, + json_mode: Optional[bool] = None, ) -> None: """ Child class of AWSEventStreamDecoder that handles the streaming response from the Anthropic family of models @@ -1534,6 +1516,7 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder): self.anthropic_model_response_iterator = AnthropicModelResponseIterator( streaming_response=None, sync_stream=sync_stream, + json_mode=json_mode, ) def _chunk_parser(self, chunk_data: dict) -> ModelResponseStream: diff --git a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py index 085cf0b9ca..d0d06ef2b2 100644 --- a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py +++ b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py @@ -3,8 +3,10 @@ from typing import Optional import litellm +from .base_invoke_transformation import AmazonInvokeConfig -class AmazonAnthropicConfig: + +class AmazonAnthropicConfig(AmazonInvokeConfig): """ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude @@ -57,9 +59,7 @@ class AmazonAnthropicConfig: and v is not None } - def get_supported_openai_params( - self, - ): + def get_supported_openai_params(self, model: str): return [ "max_tokens", "max_completion_tokens", @@ -69,7 +69,13 @@ class AmazonAnthropicConfig: "stream", ] - def map_openai_params(self, non_default_params: dict, optional_params: dict): + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ): for param, value in non_default_params.items(): if param == "max_tokens" or param == "max_completion_tokens": optional_params["max_tokens_to_sample"] = value diff --git a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py index 09842aef01..0cac339a3c 100644 --- a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py +++ b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, List, Optional import httpx -import litellm +from litellm.llms.anthropic.chat.transformation import AnthropicConfig from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import ( AmazonInvokeConfig, ) @@ -17,7 +17,7 @@ else: LiteLLMLoggingObj = Any -class AmazonAnthropicClaude3Config(AmazonInvokeConfig): +class AmazonAnthropicClaude3Config(AmazonInvokeConfig, AnthropicConfig): """ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude @@ -28,18 +28,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): anthropic_version: str = "bedrock-2023-05-31" - def get_supported_openai_params(self, model: str): - return [ - "max_tokens", - "max_completion_tokens", - "tools", - "tool_choice", - "stream", - "stop", - "temperature", - "top_p", - "extra_headers", - ] + def get_supported_openai_params(self, model: str) -> List[str]: + return AnthropicConfig.get_supported_openai_params(self, model) def map_openai_params( self, @@ -47,21 +37,14 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): optional_params: dict, model: str, drop_params: bool, - ): - for param, value in non_default_params.items(): - if param == "max_tokens" or param == "max_completion_tokens": - optional_params["max_tokens"] = value - if param == "tools": - optional_params["tools"] = value - if param == "stream": - optional_params["stream"] = value - if param == "stop": - optional_params["stop_sequences"] = value - if param == "temperature": - optional_params["temperature"] = value - if param == "top_p": - optional_params["top_p"] = value - return optional_params + ) -> dict: + return AnthropicConfig.map_openai_params( + self, + non_default_params, + optional_params, + model, + drop_params, + ) def transform_request( self, @@ -71,7 +54,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): litellm_params: dict, headers: dict, ) -> dict: - _anthropic_request = litellm.AnthropicConfig().transform_request( + _anthropic_request = AnthropicConfig.transform_request( + self, model=model, messages=messages, optional_params=optional_params, @@ -80,6 +64,7 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): ) _anthropic_request.pop("model", None) + _anthropic_request.pop("stream", None) if "anthropic_version" not in _anthropic_request: _anthropic_request["anthropic_version"] = self.anthropic_version @@ -99,7 +84,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): api_key: Optional[str] = None, json_mode: Optional[bool] = None, ) -> ModelResponse: - return litellm.AnthropicConfig().transform_response( + return AnthropicConfig.transform_response( + self, model=model, raw_response=raw_response, model_response=model_response, diff --git a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py index e98cb4fa94..e0da783897 100644 --- a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py +++ b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py @@ -3,7 +3,7 @@ import json import time import urllib.parse from functools import partial -from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union, cast, get_args +from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union import httpx @@ -461,6 +461,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): data: dict, messages: list, client: Optional[AsyncHTTPHandler] = None, + json_mode: Optional[bool] = None, ) -> CustomStreamWrapper: streaming_response = CustomStreamWrapper( completion_stream=None, @@ -475,6 +476,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): logging_obj=logging_obj, fake_stream=True if "ai21" in api_base else False, bedrock_invoke_provider=self.get_bedrock_invoke_provider(model), + json_mode=json_mode, ), model=model, custom_llm_provider="bedrock", @@ -493,6 +495,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): data: dict, messages: list, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + json_mode: Optional[bool] = None, ) -> CustomStreamWrapper: if client is None or isinstance(client, AsyncHTTPHandler): client = _get_httpx_client(params={}) @@ -509,6 +512,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): logging_obj=logging_obj, fake_stream=True if "ai21" in api_base else False, bedrock_invoke_provider=self.get_bedrock_invoke_provider(model), + json_mode=json_mode, ), model=model, custom_llm_provider="bedrock", @@ -527,56 +531,6 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): """ return False - @staticmethod - def get_bedrock_invoke_provider( - model: str, - ) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]: - """ - Helper function to get the bedrock provider from the model - - handles 3 scenarions: - 1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` - 2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` - 3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama` - 4. model=us.amazon.nova-pro-v1:0 -> Returns `nova` - """ - if model.startswith("invoke/"): - model = model.replace("invoke/", "", 1) - - _split_model = model.split(".")[0] - if _split_model in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL): - return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model) - - # If not a known provider, check for pattern with two slashes - provider = AmazonInvokeConfig._get_provider_from_model_path(model) - if provider is not None: - return provider - - # check if provider == "nova" - if "nova" in model: - return "nova" - return None - - @staticmethod - def _get_provider_from_model_path( - model_path: str, - ) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]: - """ - Helper function to get the provider from a model path with format: provider/model-name - - Args: - model_path (str): The model path (e.g., 'llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n' or 'anthropic/model-name') - - Returns: - Optional[str]: The provider name, or None if no valid provider found - """ - parts = model_path.split("/") - if len(parts) >= 1: - provider = parts[0] - if provider in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL): - return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, provider) - return None - def get_bedrock_model_id( self, optional_params: dict, diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index ebe5308c1c..991e4aeaec 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -159,6 +159,7 @@ class BaseLLMHTTPHandler: encoding: Any, api_key: Optional[str] = None, client: Optional[AsyncHTTPHandler] = None, + json_mode: bool = False, ): if client is None: async_httpx_client = get_async_httpx_client( @@ -190,6 +191,7 @@ class BaseLLMHTTPHandler: optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, + json_mode=json_mode, ) def completion( @@ -211,6 +213,7 @@ class BaseLLMHTTPHandler: headers: Optional[dict] = {}, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ): + json_mode: bool = optional_params.pop("json_mode", False) provider_config = ProviderConfigManager.get_provider_chat_config( model=model, provider=litellm.LlmProviders(custom_llm_provider) @@ -286,6 +289,7 @@ class BaseLLMHTTPHandler: else None ), litellm_params=litellm_params, + json_mode=json_mode, ) else: @@ -309,6 +313,7 @@ class BaseLLMHTTPHandler: if client is not None and isinstance(client, AsyncHTTPHandler) else None ), + json_mode=json_mode, ) if stream is True: @@ -327,6 +332,7 @@ class BaseLLMHTTPHandler: data=data, messages=messages, client=client, + json_mode=json_mode, ) completion_stream, headers = self.make_sync_call( provider_config=provider_config, @@ -380,6 +386,7 @@ class BaseLLMHTTPHandler: optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, + json_mode=json_mode, ) def make_sync_call( @@ -453,6 +460,7 @@ class BaseLLMHTTPHandler: litellm_params: dict, fake_stream: bool = False, client: Optional[AsyncHTTPHandler] = None, + json_mode: Optional[bool] = None, ): if provider_config.has_custom_stream_wrapper is True: return provider_config.get_async_custom_stream_wrapper( @@ -464,6 +472,7 @@ class BaseLLMHTTPHandler: data=data, messages=messages, client=client, + json_mode=json_mode, ) completion_stream, _response_headers = await self.make_async_call_stream_helper( @@ -720,7 +729,7 @@ class BaseLLMHTTPHandler: api_base: Optional[str] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ) -> RerankResponse: - + # get config from model, custom llm provider headers = provider_config.validate_environment( api_key=api_key, diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index 96d44e7a26..0000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -