(Refactor) - migrate bedrock invoke to BaseLLMHTTPHandler class (#8290)

* initial transform for invoke * invoke transform_response * working - able to make request * working get_complete_url * working - invoke now runs on llm_http_handler * fix unused imports * track litellm overhead ms * working stream request * sign_request transform * sign_request update * use has_async_custom_stream_wrapper property * use get_async_custom_stream_wrapper in base llm http handler * fix make_call in invoke handler * fix invoke with streaming get_async_custom_stream_wrapper * working bedrock async streaming with invoke * fix make call handler for bedrock * test_all_model_configs * fix test_bedrock_custom_prompt_template * sync streaming for bedrock invoke * fix _add_stream_param_to_request_body * test_async_text_completion_bedrock * fix transform_request * fix get_supported_openai_params * fix test supports tool choice * fix test_supports_tool_choice * add unit test coverage for bedrock invoke transform * fix location of transformation files * update import loc * fix bedrock invoke unit tests * fix import for max completion tokens
2025-04-27 11:43:54 +00:00 · 2025-02-05 18:58:55 -08:00 · 2025-02-05 18:58:55 -08:00 · 8e0736d5ad
commit 8e0736d5ad
parent 3f206cc2b4
22 changed files with 1870 additions and 737 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -360,7 +360,7 @@ BEDROCK_CONVERSE_MODELS = [
    "meta.llama3-2-90b-instruct-v1:0",
 ]
 BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
-    "cohere", "anthropic", "mistral", "amazon", "meta", "llama"
+    "cohere", "anthropic", "mistral", "amazon", "meta", "llama", "ai21"
 ]
 ####### COMPLETION MODELS ###################
 open_ai_chat_completion_models: List = []
@ -853,15 +853,33 @@ from .llms.bedrock.chat.invoke_handler import (
 )
 from .llms.bedrock.common_utils import (
    AmazonTitanConfig,
    AmazonAI21Config,
    AmazonAnthropicConfig,
    AmazonAnthropicClaude3Config,
    AmazonCohereConfig,
    AmazonLlamaConfig,
    AmazonMistralConfig,
    AmazonBedrockGlobalConfig,
 )
 from .llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation import (
    AmazonAI21Config,
 )
 from .llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation import (
    AmazonAnthropicConfig,
 )
 from .llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation import (
    AmazonAnthropicClaude3Config,
 )
 from .llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation import (
    AmazonCohereConfig,
 )
 from .llms.bedrock.chat.invoke_transformations.amazon_llama_transformation import (
    AmazonLlamaConfig,
 )
 from .llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation import (
    AmazonMistralConfig,
 )
 from .llms.bedrock.chat.invoke_transformations.amazon_titan_transformation import (
    AmazonTitanConfig,
 )
 from .llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
    AmazonInvokeConfig,
 )
 from .llms.bedrock.image.amazon_stability1_transformation import AmazonStabilityConfig
 from .llms.bedrock.image.amazon_stability3_transformation import AmazonStability3Config
 from .llms.bedrock.embed.amazon_titan_g1_transformation import AmazonTitanG1Config
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@ -19,8 +19,10 @@ import httpx
 from pydantic import BaseModel
 from litellm._logging import verbose_logger
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ModelResponse
 from litellm.utils import CustomStreamWrapper
 from ..base_utils import (
    map_developer_role_to_system_role,
@ -170,6 +172,29 @@ class BaseConfig(ABC):
    ) -> dict:
        pass
    def sign_request(
        self,
        headers: dict,
        optional_params: dict,
        request_data: dict,
        api_base: str,
        stream: Optional[bool] = None,
        fake_stream: Optional[bool] = None,
    ) -> dict:
        """
        Some providers like Bedrock require signing the request. The sign request funtion needs access to `request_data` and `complete_url`
        Args:
            headers: dict
            optional_params: dict
            request_data: dict - the request body being sent in http request
            api_base: str - the complete url being sent in http request
        Returns:
            dict - the signed headers
        Update the headers with the signed headers in this function. The return values will be sent as headers in the http request.
        """
        return headers
    def get_complete_url(
        self,
        api_base: str,
@ -228,6 +253,45 @@ class BaseConfig(ABC):
    ) -> Any:
        pass
    def get_async_custom_stream_wrapper(
        self,
        model: str,
        custom_llm_provider: str,
        logging_obj: LiteLLMLoggingObj,
        api_base: str,
        headers: dict,
        data: dict,
        messages: list,
        client: Optional[AsyncHTTPHandler] = None,
    ) -> CustomStreamWrapper:
        raise NotImplementedError
    def get_sync_custom_stream_wrapper(
        self,
        model: str,
        custom_llm_provider: str,
        logging_obj: LiteLLMLoggingObj,
        api_base: str,
        headers: dict,
        data: dict,
        messages: list,
        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
    ) -> CustomStreamWrapper:
        raise NotImplementedError
    @property
    def custom_llm_provider(self) -> Optional[str]:
        return None
    @property
    def has_custom_stream_wrapper(self) -> bool:
        return False
    @property
    def supports_stream_param_in_request_body(self) -> bool:
        """
        Some providers like Bedrock invoke do not support the stream parameter in the request body.
        By default, this is true for almost all providers.
        """
        return True
--- a/litellm/llms/bedrock/base_aws_llm.py
+++ b/litellm/llms/bedrock/base_aws_llm.py
@ -42,6 +42,17 @@ class BaseAWSLLM:
    def __init__(self) -> None:
        self.iam_cache = DualCache()
        super().__init__()
        self.aws_authentication_params = [
            "aws_access_key_id",
            "aws_secret_access_key",
            "aws_session_token",
            "aws_region_name",
            "aws_session_name",
            "aws_profile_name",
            "aws_role_name",
            "aws_web_identity_token",
            "aws_sts_endpoint",
        ]
    def get_cache_key(self, credential_args: Dict[str, Optional[str]]) -> str:
        """
@ -67,17 +78,6 @@ class BaseAWSLLM:
        Return a boto3.Credentials object
        """
        ## CHECK IS  'os.environ/' passed in
        param_names = [
            "aws_access_key_id",
            "aws_secret_access_key",
            "aws_session_token",
            "aws_region_name",
            "aws_session_name",
            "aws_profile_name",
            "aws_role_name",
            "aws_web_identity_token",
            "aws_sts_endpoint",
        ]
        params_to_check: List[Optional[str]] = [
            aws_access_key_id,
            aws_secret_access_key,
@ -97,7 +97,7 @@ class BaseAWSLLM:
                if _v is not None and isinstance(_v, str):
                    params_to_check[i] = _v
            elif param is None:  # check if uppercase value in env
-                key = param_names[i]
+                key = self.aws_authentication_params[i]
                if key.upper() in os.environ:
                    params_to_check[i] = os.getenv(key)
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@ -238,6 +238,73 @@ async def make_call(
        raise BedrockError(status_code=500, message=str(e))
 def make_sync_call(
    client: Optional[HTTPHandler],
    api_base: str,
    headers: dict,
    data: str,
    model: str,
    messages: list,
    logging_obj: Logging,
    fake_stream: bool = False,
    json_mode: Optional[bool] = False,
 ):
    try:
        if client is None:
            client = _get_httpx_client(params={})
        response = client.post(
            api_base,
            headers=headers,
            data=data,
            stream=not fake_stream,
            logging_obj=logging_obj,
        )
        if response.status_code != 200:
            raise BedrockError(status_code=response.status_code, message=response.text)
        if fake_stream:
            model_response: (
                ModelResponse
            ) = litellm.AmazonConverseConfig()._transform_response(
                model=model,
                response=response,
                model_response=litellm.ModelResponse(),
                stream=True,
                logging_obj=logging_obj,
                optional_params={},
                api_key="",
                data=data,
                messages=messages,
                print_verbose=print_verbose,
                encoding=litellm.encoding,
            )  # type: ignore
            completion_stream: Any = MockResponseIterator(
                model_response=model_response, json_mode=json_mode
            )
        else:
            decoder = AWSEventStreamDecoder(model=model)
            completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024))
        # LOGGING
        logging_obj.post_call(
            input=messages,
            api_key="",
            original_response="first stream response received",
            additional_args={"complete_input_dict": data},
        )
        return completion_stream
    except httpx.HTTPStatusError as err:
        error_code = err.response.status_code
        raise BedrockError(status_code=error_code, message=err.response.text)
    except httpx.TimeoutException:
        raise BedrockError(status_code=408, message="Timeout error occurred.")
    except Exception as e:
        raise BedrockError(status_code=500, message=str(e))
 class BedrockLLM(BaseAWSLLM):
    """
    Example call
@ -1034,7 +1101,7 @@ class BedrockLLM(BaseAWSLLM):
                client=client,
                api_base=api_base,
                headers=headers,
-                data=data,
+                data=data,  # type: ignore
                model=model,
                messages=messages,
                logging_obj=logging_obj,
--- a/litellm/llms/bedrock/chat/invoke_transformations/amazon_ai21_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/amazon_ai21_transformation.py
@ -0,0 +1,99 @@
 import types
 from typing import List, Optional
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
    AmazonInvokeConfig,
 )
 class AmazonAI21Config(AmazonInvokeConfig, BaseConfig):
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
    Supported Params for the Amazon / AI21 models:
    - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
    - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
    - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
    - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
    - `frequencyPenalty` (object): Placeholder for frequency penalty object.
    - `presencePenalty` (object): Placeholder for presence penalty object.
    - `countPenalty` (object): Placeholder for count penalty object.
    """
    maxTokens: Optional[int] = None
    temperature: Optional[float] = None
    topP: Optional[float] = None
    stopSequences: Optional[list] = None
    frequencePenalty: Optional[dict] = None
    presencePenalty: Optional[dict] = None
    countPenalty: Optional[dict] = None
    def __init__(
        self,
        maxTokens: Optional[int] = None,
        temperature: Optional[float] = None,
        topP: Optional[float] = None,
        stopSequences: Optional[list] = None,
        frequencePenalty: Optional[dict] = None,
        presencePenalty: Optional[dict] = None,
        countPenalty: Optional[dict] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
        AmazonInvokeConfig.__init__(self)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self, model: str) -> List:
        return [
            "max_tokens",
            "temperature",
            "top_p",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "max_tokens":
                optional_params["maxTokens"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "top_p":
                optional_params["topP"] = v
            if k == "stream":
                optional_params["stream"] = v
        return optional_params
--- a/litellm/llms/bedrock/chat/invoke_transformations/amazon_cohere_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/amazon_cohere_transformation.py
@ -0,0 +1,78 @@
 import types
 from typing import List, Optional
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
    AmazonInvokeConfig,
 )
 class AmazonCohereConfig(AmazonInvokeConfig, BaseConfig):
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=command
    Supported Params for the Amazon / Cohere models:
    - `max_tokens` (integer) max tokens,
    - `temperature` (float) model temperature,
    - `return_likelihood` (string) n/a
    """
    max_tokens: Optional[int] = None
    temperature: Optional[float] = None
    return_likelihood: Optional[str] = None
    def __init__(
        self,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        return_likelihood: Optional[str] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
        AmazonInvokeConfig.__init__(self)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self, model: str) -> List[str]:
        return [
            "max_tokens",
            "temperature",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "stream":
                optional_params["stream"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "max_tokens":
                optional_params["max_tokens"] = v
        return optional_params
--- a/litellm/llms/bedrock/chat/invoke_transformations/amazon_llama_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/amazon_llama_transformation.py
@ -0,0 +1,80 @@
 import types
 from typing import List, Optional
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
    AmazonInvokeConfig,
 )
 class AmazonLlamaConfig(AmazonInvokeConfig, BaseConfig):
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
    Supported Params for the Amazon / Meta Llama models:
    - `max_gen_len` (integer) max tokens,
    - `temperature` (float) temperature for model,
    - `top_p` (float) top p for model
    """
    max_gen_len: Optional[int] = None
    temperature: Optional[float] = None
    topP: Optional[float] = None
    def __init__(
        self,
        maxTokenCount: Optional[int] = None,
        temperature: Optional[float] = None,
        topP: Optional[int] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
        AmazonInvokeConfig.__init__(self)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self, model: str) -> List:
        return [
            "max_tokens",
            "temperature",
            "top_p",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "max_tokens":
                optional_params["max_gen_len"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "top_p":
                optional_params["top_p"] = v
            if k == "stream":
                optional_params["stream"] = v
        return optional_params
--- a/litellm/llms/bedrock/chat/invoke_transformations/amazon_mistral_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/amazon_mistral_transformation.py
@ -0,0 +1,83 @@
 import types
 from typing import List, Optional
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
    AmazonInvokeConfig,
 )
 class AmazonMistralConfig(AmazonInvokeConfig, BaseConfig):
    """
    Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-mistral.html
    Supported Params for the Amazon / Mistral models:
    - `max_tokens` (integer) max tokens,
    - `temperature` (float) temperature for model,
    - `top_p` (float) top p for model
    - `stop` [string] A list of stop sequences that if generated by the model, stops the model from generating further output.
    - `top_k` (float) top k for model
    """
    max_tokens: Optional[int] = None
    temperature: Optional[float] = None
    top_p: Optional[float] = None
    top_k: Optional[float] = None
    stop: Optional[List[str]] = None
    def __init__(
        self,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        top_p: Optional[int] = None,
        top_k: Optional[float] = None,
        stop: Optional[List[str]] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
        AmazonInvokeConfig.__init__(self)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self, model: str) -> List[str]:
        return ["max_tokens", "temperature", "top_p", "stop", "stream"]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "max_tokens":
                optional_params["max_tokens"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "top_p":
                optional_params["top_p"] = v
            if k == "stop":
                optional_params["stop"] = v
            if k == "stream":
                optional_params["stream"] = v
        return optional_params
--- a/litellm/llms/bedrock/chat/invoke_transformations/amazon_titan_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/amazon_titan_transformation.py
@ -0,0 +1,116 @@
 import re
 import types
 from typing import List, Optional, Union
 import litellm
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
    AmazonInvokeConfig,
 )
 class AmazonTitanConfig(AmazonInvokeConfig, BaseConfig):
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-text-express-v1
    Supported Params for the Amazon Titan models:
    - `maxTokenCount` (integer) max tokens,
    - `stopSequences` (string[]) list of stop sequence strings
    - `temperature` (float) temperature for model,
    - `topP` (int) top p for model
    """
    maxTokenCount: Optional[int] = None
    stopSequences: Optional[list] = None
    temperature: Optional[float] = None
    topP: Optional[int] = None
    def __init__(
        self,
        maxTokenCount: Optional[int] = None,
        stopSequences: Optional[list] = None,
        temperature: Optional[float] = None,
        topP: Optional[int] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
        AmazonInvokeConfig.__init__(self)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def _map_and_modify_arg(
        self,
        supported_params: dict,
        provider: str,
        model: str,
        stop: Union[List[str], str],
    ):
        """
        filter params to fit the required provider format, drop those that don't fit if user sets `litellm.drop_params = True`.
        """
        filtered_stop = None
        if "stop" in supported_params and litellm.drop_params:
            if provider == "bedrock" and "amazon" in model:
                filtered_stop = []
                if isinstance(stop, list):
                    for s in stop:
                        if re.match(r"^(\|+|User:)$", s):
                            filtered_stop.append(s)
        if filtered_stop is not None:
            supported_params["stop"] = filtered_stop
        return supported_params
    def get_supported_openai_params(self, model: str) -> List[str]:
        return [
            "max_tokens",
            "max_completion_tokens",
            "stop",
            "temperature",
            "top_p",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "max_tokens" or k == "max_completion_tokens":
                optional_params["maxTokenCount"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "stop":
                filtered_stop = self._map_and_modify_arg(
                    {"stop": v}, provider="bedrock", model=model, stop=v
                )
                optional_params["stopSequences"] = filtered_stop["stop"]
            if k == "top_p":
                optional_params["topP"] = v
            if k == "stream":
                optional_params["stream"] = v
        return optional_params
--- a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py
@ -0,0 +1,84 @@
 import types
 from typing import Optional
 import litellm
 class AmazonAnthropicConfig:
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
    Supported Params for the Amazon / Anthropic models:
    - `max_tokens_to_sample` (integer) max tokens,
    - `temperature` (float) model temperature,
    - `top_k` (integer) top k,
    - `top_p` (integer) top p,
    - `stop_sequences` (string[]) list of stop sequences - e.g. ["\\n\\nHuman:"],
    - `anthropic_version` (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
    """
    max_tokens_to_sample: Optional[int] = litellm.max_tokens
    stop_sequences: Optional[list] = None
    temperature: Optional[float] = None
    top_k: Optional[int] = None
    top_p: Optional[int] = None
    anthropic_version: Optional[str] = None
    def __init__(
        self,
        max_tokens_to_sample: Optional[int] = None,
        stop_sequences: Optional[list] = None,
        temperature: Optional[float] = None,
        top_k: Optional[int] = None,
        top_p: Optional[int] = None,
        anthropic_version: Optional[str] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(
        self,
    ):
        return [
            "max_tokens",
            "max_completion_tokens",
            "temperature",
            "stop",
            "top_p",
            "stream",
        ]
    def map_openai_params(self, non_default_params: dict, optional_params: dict):
        for param, value in non_default_params.items():
            if param == "max_tokens" or param == "max_completion_tokens":
                optional_params["max_tokens_to_sample"] = value
            if param == "temperature":
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
            if param == "stop":
                optional_params["stop_sequences"] = value
            if param == "stream" and value is True:
                optional_params["stream"] = value
        return optional_params
--- a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py
@ -0,0 +1,85 @@
 import types
 from typing import List, Optional
 class AmazonAnthropicClaude3Config:
    """
    Reference:
        https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
        https://docs.anthropic.com/claude/docs/models-overview#model-comparison
    Supported Params for the Amazon / Anthropic Claude 3 models:
    - `max_tokens` Required (integer) max tokens. Default is 4096
    - `anthropic_version` Required (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
    - `system` Optional (string) the system prompt, conversion from openai format to this is handled in factory.py
    - `temperature` Optional (float) The amount of randomness injected into the response
    - `top_p` Optional (float) Use nucleus sampling.
    - `top_k` Optional (int) Only sample from the top K options for each subsequent token
    - `stop_sequences` Optional (List[str]) Custom text sequences that cause the model to stop generating
    """
    max_tokens: Optional[int] = 4096  # Opus, Sonnet, and Haiku default
    anthropic_version: Optional[str] = "bedrock-2023-05-31"
    system: Optional[str] = None
    temperature: Optional[float] = None
    top_p: Optional[float] = None
    top_k: Optional[int] = None
    stop_sequences: Optional[List[str]] = None
    def __init__(
        self,
        max_tokens: Optional[int] = None,
        anthropic_version: Optional[str] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self):
        return [
            "max_tokens",
            "max_completion_tokens",
            "tools",
            "tool_choice",
            "stream",
            "stop",
            "temperature",
            "top_p",
            "extra_headers",
        ]
    def map_openai_params(self, non_default_params: dict, optional_params: dict):
        for param, value in non_default_params.items():
            if param == "max_tokens" or param == "max_completion_tokens":
                optional_params["max_tokens"] = value
            if param == "tools":
                optional_params["tools"] = value
            if param == "stream":
                optional_params["stream"] = value
            if param == "stop":
                optional_params["stop_sequences"] = value
            if param == "temperature":
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
        return optional_params
--- a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
@ -0,0 +1,738 @@
 import copy
 import json
 import time
 import urllib.parse
 import uuid
 from functools import partial
 from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union, cast, get_args
 import httpx
 import litellm
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
 from litellm.litellm_core_utils.prompt_templates.factory import (
    cohere_message_pt,
    construct_tool_use_system_prompt,
    contains_tag,
    custom_prompt,
    extract_between_tags,
    parse_xml_params,
    prompt_factory,
 )
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.llms.bedrock.chat.invoke_handler import make_call, make_sync_call
 from litellm.llms.bedrock.common_utils import BedrockError
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    _get_httpx_client,
 )
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ModelResponse, Usage
 from litellm.utils import CustomStreamWrapper, get_secret
 if TYPE_CHECKING:
    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
    LiteLLMLoggingObj = _LiteLLMLoggingObj
 else:
    LiteLLMLoggingObj = Any
 from litellm.llms.bedrock.base_aws_llm import BaseAWSLLM
 class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
    def __init__(self, **kwargs):
        BaseConfig.__init__(self, **kwargs)
        BaseAWSLLM.__init__(self, **kwargs)
    def get_supported_openai_params(self, model: str) -> List[str]:
        """
        This is a base invoke model mapping. For Invoke - define a bedrock provider specific config that extends this class.
        """
        return [
            "max_tokens",
            "max_completion_tokens",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        """
        This is a base invoke model mapping. For Invoke - define a bedrock provider specific config that extends this class.
        """
        for param, value in non_default_params.items():
            if param == "max_tokens" or param == "max_completion_tokens":
                optional_params["max_tokens"] = value
            if param == "stream":
                optional_params["stream"] = value
        return optional_params
    def get_complete_url(
        self,
        api_base: str,
        model: str,
        optional_params: dict,
        stream: Optional[bool] = None,
    ) -> str:
        """
        Get the complete url for the request
        """
        provider = self.get_bedrock_invoke_provider(model)
        modelId = self.get_bedrock_model_id(
            model=model,
            provider=provider,
            optional_params=optional_params,
        )
        ### SET RUNTIME ENDPOINT ###
        aws_bedrock_runtime_endpoint = optional_params.pop(
            "aws_bedrock_runtime_endpoint", None
        )  # https://bedrock-runtime.{region_name}.amazonaws.com
        endpoint_url, proxy_endpoint_url = self.get_runtime_endpoint(
            api_base=api_base,
            aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
            aws_region_name=self._get_aws_region_name(optional_params=optional_params),
        )
        if (stream is not None and stream is True) and provider != "ai21":
            endpoint_url = f"{endpoint_url}/model/{modelId}/invoke-with-response-stream"
            proxy_endpoint_url = (
                f"{proxy_endpoint_url}/model/{modelId}/invoke-with-response-stream"
            )
        else:
            endpoint_url = f"{endpoint_url}/model/{modelId}/invoke"
            proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke"
        return endpoint_url
    def sign_request(
        self,
        headers: dict,
        optional_params: dict,
        request_data: dict,
        api_base: str,
        stream: Optional[bool] = None,
        fake_stream: Optional[bool] = None,
    ) -> dict:
        try:
            from botocore.auth import SigV4Auth
            from botocore.awsrequest import AWSRequest
            from botocore.credentials import Credentials
        except ImportError:
            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
        ## CREDENTIALS ##
        # pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
        extra_headers = optional_params.pop("extra_headers", None)
        aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
        aws_access_key_id = optional_params.pop("aws_access_key_id", None)
        aws_session_token = optional_params.pop("aws_session_token", None)
        aws_role_name = optional_params.pop("aws_role_name", None)
        aws_session_name = optional_params.pop("aws_session_name", None)
        aws_profile_name = optional_params.pop("aws_profile_name", None)
        aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
        aws_sts_endpoint = optional_params.pop("aws_sts_endpoint", None)
        aws_region_name = self._get_aws_region_name(optional_params)
        credentials: Credentials = self.get_credentials(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            aws_session_token=aws_session_token,
            aws_region_name=aws_region_name,
            aws_session_name=aws_session_name,
            aws_profile_name=aws_profile_name,
            aws_role_name=aws_role_name,
            aws_web_identity_token=aws_web_identity_token,
            aws_sts_endpoint=aws_sts_endpoint,
        )
        sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
        headers = {"Content-Type": "application/json"}
        if extra_headers is not None:
            headers = {"Content-Type": "application/json", **extra_headers}
        request = AWSRequest(
            method="POST",
            url=api_base,
            data=json.dumps(request_data),
            headers=headers,
        )
        sigv4.add_auth(request)
        if (
            extra_headers is not None and "Authorization" in extra_headers
        ):  # prevent sigv4 from overwriting the auth header
            request.headers["Authorization"] = extra_headers["Authorization"]
        return dict(request.headers)
    def transform_request(  # noqa: PLR0915
        self,
        model: str,
        messages: List[AllMessageValues],
        optional_params: dict,
        litellm_params: dict,
        headers: dict,
    ) -> dict:
        ## SETUP ##
        stream = optional_params.pop("stream", None)
        custom_prompt_dict: dict = litellm_params.pop("custom_prompt_dict", None) or {}
        provider = self.get_bedrock_invoke_provider(model)
        prompt, chat_history = self.convert_messages_to_prompt(
            model, messages, provider, custom_prompt_dict
        )
        inference_params = copy.deepcopy(optional_params)
        inference_params = {
            k: v
            for k, v in inference_params.items()
            if k not in self.aws_authentication_params
        }
        json_schemas: dict = {}
        request_data: dict = {}
        if provider == "cohere":
            if model.startswith("cohere.command-r"):
                ## LOAD CONFIG
                config = litellm.AmazonCohereChatConfig().get_config()
                for k, v in config.items():
                    if (
                        k not in inference_params
                    ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                        inference_params[k] = v
                _data = {"message": prompt, **inference_params}
                if chat_history is not None:
                    _data["chat_history"] = chat_history
                request_data = _data
            else:
                ## LOAD CONFIG
                config = litellm.AmazonCohereConfig.get_config()
                for k, v in config.items():
                    if (
                        k not in inference_params
                    ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                        inference_params[k] = v
                if stream is True:
                    inference_params["stream"] = (
                        True  # cohere requires stream = True in inference params
                    )
                request_data = {"prompt": prompt, **inference_params}
        elif provider == "anthropic":
            if model.startswith("anthropic.claude-3"):
                # Separate system prompt from rest of message
                system_prompt_idx: list[int] = []
                system_messages: list[str] = []
                for idx, message in enumerate(messages):
                    if message["role"] == "system" and isinstance(
                        message["content"], str
                    ):
                        system_messages.append(message["content"])
                        system_prompt_idx.append(idx)
                if len(system_prompt_idx) > 0:
                    inference_params["system"] = "\n".join(system_messages)
                    messages = [
                        i for j, i in enumerate(messages) if j not in system_prompt_idx
                    ]
                # Format rest of message according to anthropic guidelines
                messages = prompt_factory(
                    model=model, messages=messages, custom_llm_provider="anthropic_xml"
                )  # type: ignore
                ## LOAD CONFIG
                config = litellm.AmazonAnthropicClaude3Config.get_config()
                for k, v in config.items():
                    if (
                        k not in inference_params
                    ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                        inference_params[k] = v
                ## Handle Tool Calling
                if "tools" in inference_params:
                    _is_function_call = True
                    for tool in inference_params["tools"]:
                        json_schemas[tool["function"]["name"]] = tool["function"].get(
                            "parameters", None
                        )
                    tool_calling_system_prompt = construct_tool_use_system_prompt(
                        tools=inference_params["tools"]
                    )
                    inference_params["system"] = (
                        inference_params.get("system", "\n")
                        + tool_calling_system_prompt
                    )  # add the anthropic tool calling prompt to the system prompt
                    inference_params.pop("tools")
                request_data = {"messages": messages, **inference_params}
            else:
                ## LOAD CONFIG
                config = litellm.AmazonAnthropicConfig.get_config()
                for k, v in config.items():
                    if (
                        k not in inference_params
                    ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                        inference_params[k] = v
                request_data = {"prompt": prompt, **inference_params}
        elif provider == "ai21":
            ## LOAD CONFIG
            config = litellm.AmazonAI21Config.get_config()
            for k, v in config.items():
                if (
                    k not in inference_params
                ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                    inference_params[k] = v
            request_data = {"prompt": prompt, **inference_params}
        elif provider == "mistral":
            ## LOAD CONFIG
            config = litellm.AmazonMistralConfig.get_config()
            for k, v in config.items():
                if (
                    k not in inference_params
                ):  # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
                    inference_params[k] = v
            request_data = {"prompt": prompt, **inference_params}
        elif provider == "amazon":  # amazon titan
            ## LOAD CONFIG
            config = litellm.AmazonTitanConfig.get_config()
            for k, v in config.items():
                if (
                    k not in inference_params
                ):  # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
                    inference_params[k] = v
            request_data = {
                "inputText": prompt,
                "textGenerationConfig": inference_params,
            }
        elif provider == "meta" or provider == "llama":
            ## LOAD CONFIG
            config = litellm.AmazonLlamaConfig.get_config()
            for k, v in config.items():
                if (
                    k not in inference_params
                ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                    inference_params[k] = v
            request_data = {"prompt": prompt, **inference_params}
        else:
            raise BedrockError(
                status_code=404,
                message="Bedrock Invoke HTTPX: Unknown provider={}, model={}. Try calling via converse route - `bedrock/converse/<model>`.".format(
                    provider, model
                ),
            )
        return request_data
    def transform_response(  # noqa: PLR0915
        self,
        model: str,
        raw_response: httpx.Response,
        model_response: ModelResponse,
        logging_obj: LiteLLMLoggingObj,
        request_data: dict,
        messages: List[AllMessageValues],
        optional_params: dict,
        litellm_params: dict,
        encoding: Any,
        api_key: Optional[str] = None,
        json_mode: Optional[bool] = None,
    ) -> ModelResponse:
        try:
            completion_response = raw_response.json()
        except Exception:
            raise BedrockError(
                message=raw_response.text, status_code=raw_response.status_code
            )
        provider = self.get_bedrock_invoke_provider(model)
        outputText: Optional[str] = None
        try:
            if provider == "cohere":
                if "text" in completion_response:
                    outputText = completion_response["text"]  # type: ignore
                elif "generations" in completion_response:
                    outputText = completion_response["generations"][0]["text"]
                    model_response.choices[0].finish_reason = map_finish_reason(
                        completion_response["generations"][0]["finish_reason"]
                    )
            elif provider == "anthropic":
                if model.startswith("anthropic.claude-3"):
                    json_schemas: dict = {}
                    _is_function_call = False
                    ## Handle Tool Calling
                    if "tools" in optional_params:
                        _is_function_call = True
                        for tool in optional_params["tools"]:
                            json_schemas[tool["function"]["name"]] = tool[
                                "function"
                            ].get("parameters", None)
                    outputText = completion_response.get("content")[0].get("text", None)
                    if outputText is not None and contains_tag(
                        "invoke", outputText
                    ):  # OUTPUT PARSE FUNCTION CALL
                        function_name = extract_between_tags("tool_name", outputText)[0]
                        function_arguments_str = extract_between_tags(
                            "invoke", outputText
                        )[0].strip()
                        function_arguments_str = (
                            f"<invoke>{function_arguments_str}</invoke>"
                        )
                        function_arguments = parse_xml_params(
                            function_arguments_str,
                            json_schema=json_schemas.get(
                                function_name, None
                            ),  # check if we have a json schema for this function name)
                        )
                        _message = litellm.Message(
                            tool_calls=[
                                {
                                    "id": f"call_{uuid.uuid4()}",
                                    "type": "function",
                                    "function": {
                                        "name": function_name,
                                        "arguments": json.dumps(function_arguments),
                                    },
                                }
                            ],
                            content=None,
                        )
                        model_response.choices[0].message = _message  # type: ignore
                        model_response._hidden_params["original_response"] = (
                            outputText  # allow user to access raw anthropic tool calling response
                        )
                    model_response.choices[0].finish_reason = map_finish_reason(
                        completion_response.get("stop_reason", "")
                    )
                    _usage = litellm.Usage(
                        prompt_tokens=completion_response["usage"]["input_tokens"],
                        completion_tokens=completion_response["usage"]["output_tokens"],
                        total_tokens=completion_response["usage"]["input_tokens"]
                        + completion_response["usage"]["output_tokens"],
                    )
                    setattr(model_response, "usage", _usage)
                else:
                    outputText = completion_response["completion"]
                    model_response.choices[0].finish_reason = completion_response[
                        "stop_reason"
                    ]
            elif provider == "ai21":
                outputText = (
                    completion_response.get("completions")[0].get("data").get("text")
                )
            elif provider == "meta" or provider == "llama":
                outputText = completion_response["generation"]
            elif provider == "mistral":
                outputText = completion_response["outputs"][0]["text"]
                model_response.choices[0].finish_reason = completion_response[
                    "outputs"
                ][0]["stop_reason"]
            else:  # amazon titan
                outputText = completion_response.get("results")[0].get("outputText")
        except Exception as e:
            raise BedrockError(
                message="Error processing={}, Received error={}".format(
                    raw_response.text, str(e)
                ),
                status_code=422,
            )
        try:
            if (
                outputText is not None
                and len(outputText) > 0
                and hasattr(model_response.choices[0], "message")
                and getattr(model_response.choices[0].message, "tool_calls", None)  # type: ignore
                is None
            ):
                model_response.choices[0].message.content = outputText  # type: ignore
            elif (
                hasattr(model_response.choices[0], "message")
                and getattr(model_response.choices[0].message, "tool_calls", None)  # type: ignore
                is not None
            ):
                pass
            else:
                raise Exception()
        except Exception as e:
            raise BedrockError(
                message="Error parsing received text={}.\nError-{}".format(
                    outputText, str(e)
                ),
                status_code=raw_response.status_code,
            )
        ## CALCULATING USAGE - bedrock returns usage in the headers
        bedrock_input_tokens = raw_response.headers.get(
            "x-amzn-bedrock-input-token-count", None
        )
        bedrock_output_tokens = raw_response.headers.get(
            "x-amzn-bedrock-output-token-count", None
        )
        prompt_tokens = int(
            bedrock_input_tokens or litellm.token_counter(messages=messages)
        )
        completion_tokens = int(
            bedrock_output_tokens
            or litellm.token_counter(
                text=model_response.choices[0].message.content,  # type: ignore
                count_response_tokens=True,
            )
        )
        model_response.created = int(time.time())
        model_response.model = model
        usage = Usage(
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
            total_tokens=prompt_tokens + completion_tokens,
        )
        setattr(model_response, "usage", usage)
        return model_response
    def validate_environment(
        self,
        headers: dict,
        model: str,
        messages: List[AllMessageValues],
        optional_params: dict,
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
    ) -> dict:
        return {}
    def get_error_class(
        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
    ) -> BaseLLMException:
        return BedrockError(status_code=status_code, message=error_message)
    @track_llm_api_timing()
    def get_async_custom_stream_wrapper(
        self,
        model: str,
        custom_llm_provider: str,
        logging_obj: LiteLLMLoggingObj,
        api_base: str,
        headers: dict,
        data: dict,
        messages: list,
        client: Optional[AsyncHTTPHandler] = None,
    ) -> CustomStreamWrapper:
        streaming_response = CustomStreamWrapper(
            completion_stream=None,
            make_call=partial(
                make_call,
                client=client,
                api_base=api_base,
                headers=headers,
                data=json.dumps(data),
                model=model,
                messages=messages,
                logging_obj=logging_obj,
                fake_stream=True if "ai21" in api_base else False,
            ),
            model=model,
            custom_llm_provider="bedrock",
            logging_obj=logging_obj,
        )
        return streaming_response
    @track_llm_api_timing()
    def get_sync_custom_stream_wrapper(
        self,
        model: str,
        custom_llm_provider: str,
        logging_obj: LiteLLMLoggingObj,
        api_base: str,
        headers: dict,
        data: dict,
        messages: list,
        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
    ) -> CustomStreamWrapper:
        if client is None or isinstance(client, AsyncHTTPHandler):
            client = _get_httpx_client(params={})
        streaming_response = CustomStreamWrapper(
            completion_stream=None,
            make_call=partial(
                make_sync_call,
                client=client,
                api_base=api_base,
                headers=headers,
                data=json.dumps(data),
                model=model,
                messages=messages,
                logging_obj=logging_obj,
                fake_stream=True if "ai21" in api_base else False,
            ),
            model=model,
            custom_llm_provider="bedrock",
            logging_obj=logging_obj,
        )
        return streaming_response
    @property
    def has_custom_stream_wrapper(self) -> bool:
        return True
    @property
    def supports_stream_param_in_request_body(self) -> bool:
        """
        Bedrock invoke does not allow passing `stream` in the request body.
        """
        return False
    @staticmethod
    def get_bedrock_invoke_provider(
        model: str,
    ) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]:
        """
        Helper function to get the bedrock provider from the model
        handles 2 scenarions:
        1. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
        2. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
        """
        _split_model = model.split(".")[0]
        if _split_model in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
            return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model)
        # If not a known provider, check for pattern with two slashes
        provider = AmazonInvokeConfig._get_provider_from_model_path(model)
        if provider is not None:
            return provider
        return None
    @staticmethod
    def _get_provider_from_model_path(
        model_path: str,
    ) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]:
        """
        Helper function to get the provider from a model path with format: provider/model-name
        Args:
            model_path (str): The model path (e.g., 'llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n' or 'anthropic/model-name')
        Returns:
            Optional[str]: The provider name, or None if no valid provider found
        """
        parts = model_path.split("/")
        if len(parts) >= 1:
            provider = parts[0]
            if provider in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
                return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, provider)
        return None
    def get_bedrock_model_id(
        self,
        optional_params: dict,
        provider: Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL],
        model: str,
    ) -> str:
        modelId = optional_params.pop("model_id", None)
        if modelId is not None:
            modelId = self.encode_model_id(model_id=modelId)
        else:
            modelId = model
        if provider == "llama" and "llama/" in modelId:
            modelId = self._get_model_id_for_llama_like_model(modelId)
        return modelId
    def _get_aws_region_name(self, optional_params: dict) -> str:
        """
        Get the AWS region name from the environment variables
        """
        aws_region_name = optional_params.pop("aws_region_name", None)
        ### SET REGION NAME ###
        if aws_region_name is None:
            # check env #
            litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
            if litellm_aws_region_name is not None and isinstance(
                litellm_aws_region_name, str
            ):
                aws_region_name = litellm_aws_region_name
            standard_aws_region_name = get_secret("AWS_REGION", None)
            if standard_aws_region_name is not None and isinstance(
                standard_aws_region_name, str
            ):
                aws_region_name = standard_aws_region_name
        if aws_region_name is None:
            aws_region_name = "us-west-2"
        return aws_region_name
    def _get_model_id_for_llama_like_model(
        self,
        model: str,
    ) -> str:
        """
        Remove `llama` from modelID since `llama` is simply a spec to follow for custom bedrock models
        """
        model_id = model.replace("llama/", "")
        return self.encode_model_id(model_id=model_id)
    def encode_model_id(self, model_id: str) -> str:
        """
        Double encode the model ID to ensure it matches the expected double-encoded format.
        Args:
            model_id (str): The model ID to encode.
        Returns:
            str: The double-encoded model ID.
        """
        return urllib.parse.quote(model_id, safe="")
    def convert_messages_to_prompt(
        self, model, messages, provider, custom_prompt_dict
    ) -> Tuple[str, Optional[list]]:
        # handle anthropic prompts and amazon titan prompts
        prompt = ""
        chat_history: Optional[list] = None
        ## CUSTOM PROMPT
        if model in custom_prompt_dict:
            # check if the model has a registered custom prompt
            model_prompt_details = custom_prompt_dict[model]
            prompt = custom_prompt(
                role_dict=model_prompt_details["roles"],
                initial_prompt_value=model_prompt_details.get(
                    "initial_prompt_value", ""
                ),
                final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
                messages=messages,
            )
            return prompt, None
        ## ELSE
        if provider == "anthropic" or provider == "amazon":
            prompt = prompt_factory(
                model=model, messages=messages, custom_llm_provider="bedrock"
            )
        elif provider == "mistral":
            prompt = prompt_factory(
                model=model, messages=messages, custom_llm_provider="bedrock"
            )
        elif provider == "meta" or provider == "llama":
            prompt = prompt_factory(
                model=model, messages=messages, custom_llm_provider="bedrock"
            )
        elif provider == "cohere":
            prompt, chat_history = cohere_message_pt(messages=messages)
        else:
            prompt = ""
            for message in messages:
                if "role" in message:
                    if message["role"] == "user":
                        prompt += f"{message['content']}"
                    else:
                        prompt += f"{message['content']}"
                else:
                    prompt += f"{message['content']}"
        return prompt, chat_history  # type: ignore
--- a/litellm/llms/bedrock/common_utils.py
+++ b/litellm/llms/bedrock/common_utils.py
@ -3,22 +3,13 @@ Common utilities used across bedrock chat/embedding/image generation
 """
 import os
-import re
+from typing import List, Optional, Union
 import types
 from enum import Enum
 from typing import Any, List, Optional, Union
 import httpx
 import litellm
-from litellm.llms.base_llm.chat.transformation import (
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
    BaseConfig,
    BaseLLMException,
    LiteLLMLoggingObj,
 )
 from litellm.secret_managers.main import get_secret
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ModelResponse
 class BedrockError(BaseLLMException):
@ -84,642 +75,6 @@ class AmazonBedrockGlobalConfig:
        ]
 class AmazonInvokeMixin:
    """
    Base class for bedrock models going through invoke_handler.py
    """
    def get_error_class(
        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
    ) -> BaseLLMException:
        return BedrockError(
            message=error_message,
            status_code=status_code,
            headers=headers,
        )
    def transform_request(
        self,
        model: str,
        messages: List[AllMessageValues],
        optional_params: dict,
        litellm_params: dict,
        headers: dict,
    ) -> dict:
        raise NotImplementedError(
            "transform_request not implemented for config. Done in invoke_handler.py"
        )
    def transform_response(
        self,
        model: str,
        raw_response: httpx.Response,
        model_response: ModelResponse,
        logging_obj: LiteLLMLoggingObj,
        request_data: dict,
        messages: List[AllMessageValues],
        optional_params: dict,
        litellm_params: dict,
        encoding: Any,
        api_key: Optional[str] = None,
        json_mode: Optional[bool] = None,
    ) -> ModelResponse:
        raise NotImplementedError(
            "transform_response not implemented for config. Done in invoke_handler.py"
        )
    def validate_environment(
        self,
        headers: dict,
        model: str,
        messages: List[AllMessageValues],
        optional_params: dict,
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
    ) -> dict:
        raise NotImplementedError(
            "validate_environment not implemented for config. Done in invoke_handler.py"
        )
 class AmazonTitanConfig(AmazonInvokeMixin, BaseConfig):
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-text-express-v1
    Supported Params for the Amazon Titan models:
    - `maxTokenCount` (integer) max tokens,
    - `stopSequences` (string[]) list of stop sequence strings
    - `temperature` (float) temperature for model,
    - `topP` (int) top p for model
    """
    maxTokenCount: Optional[int] = None
    stopSequences: Optional[list] = None
    temperature: Optional[float] = None
    topP: Optional[int] = None
    def __init__(
        self,
        maxTokenCount: Optional[int] = None,
        stopSequences: Optional[list] = None,
        temperature: Optional[float] = None,
        topP: Optional[int] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def _map_and_modify_arg(
        self,
        supported_params: dict,
        provider: str,
        model: str,
        stop: Union[List[str], str],
    ):
        """
        filter params to fit the required provider format, drop those that don't fit if user sets `litellm.drop_params = True`.
        """
        filtered_stop = None
        if "stop" in supported_params and litellm.drop_params:
            if provider == "bedrock" and "amazon" in model:
                filtered_stop = []
                if isinstance(stop, list):
                    for s in stop:
                        if re.match(r"^(\|+|User:)$", s):
                            filtered_stop.append(s)
        if filtered_stop is not None:
            supported_params["stop"] = filtered_stop
        return supported_params
    def get_supported_openai_params(self, model: str) -> List[str]:
        return [
            "max_tokens",
            "max_completion_tokens",
            "stop",
            "temperature",
            "top_p",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "max_tokens" or k == "max_completion_tokens":
                optional_params["maxTokenCount"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "stop":
                filtered_stop = self._map_and_modify_arg(
                    {"stop": v}, provider="bedrock", model=model, stop=v
                )
                optional_params["stopSequences"] = filtered_stop["stop"]
            if k == "top_p":
                optional_params["topP"] = v
            if k == "stream":
                optional_params["stream"] = v
        return optional_params
 class AmazonAnthropicClaude3Config:
    """
    Reference:
        https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
        https://docs.anthropic.com/claude/docs/models-overview#model-comparison
    Supported Params for the Amazon / Anthropic Claude 3 models:
    - `max_tokens` Required (integer) max tokens. Default is 4096
    - `anthropic_version` Required (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
    - `system` Optional (string) the system prompt, conversion from openai format to this is handled in factory.py
    - `temperature` Optional (float) The amount of randomness injected into the response
    - `top_p` Optional (float) Use nucleus sampling.
    - `top_k` Optional (int) Only sample from the top K options for each subsequent token
    - `stop_sequences` Optional (List[str]) Custom text sequences that cause the model to stop generating
    """
    max_tokens: Optional[int] = 4096  # Opus, Sonnet, and Haiku default
    anthropic_version: Optional[str] = "bedrock-2023-05-31"
    system: Optional[str] = None
    temperature: Optional[float] = None
    top_p: Optional[float] = None
    top_k: Optional[int] = None
    stop_sequences: Optional[List[str]] = None
    def __init__(
        self,
        max_tokens: Optional[int] = None,
        anthropic_version: Optional[str] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self):
        return [
            "max_tokens",
            "max_completion_tokens",
            "tools",
            "tool_choice",
            "stream",
            "stop",
            "temperature",
            "top_p",
            "extra_headers",
        ]
    def map_openai_params(self, non_default_params: dict, optional_params: dict):
        for param, value in non_default_params.items():
            if param == "max_tokens" or param == "max_completion_tokens":
                optional_params["max_tokens"] = value
            if param == "tools":
                optional_params["tools"] = value
            if param == "stream":
                optional_params["stream"] = value
            if param == "stop":
                optional_params["stop_sequences"] = value
            if param == "temperature":
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
        return optional_params
 class AmazonAnthropicConfig:
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
    Supported Params for the Amazon / Anthropic models:
    - `max_tokens_to_sample` (integer) max tokens,
    - `temperature` (float) model temperature,
    - `top_k` (integer) top k,
    - `top_p` (integer) top p,
    - `stop_sequences` (string[]) list of stop sequences - e.g. ["\\n\\nHuman:"],
    - `anthropic_version` (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
    """
    max_tokens_to_sample: Optional[int] = litellm.max_tokens
    stop_sequences: Optional[list] = None
    temperature: Optional[float] = None
    top_k: Optional[int] = None
    top_p: Optional[int] = None
    anthropic_version: Optional[str] = None
    def __init__(
        self,
        max_tokens_to_sample: Optional[int] = None,
        stop_sequences: Optional[list] = None,
        temperature: Optional[float] = None,
        top_k: Optional[int] = None,
        top_p: Optional[int] = None,
        anthropic_version: Optional[str] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(
        self,
    ):
        return [
            "max_tokens",
            "max_completion_tokens",
            "temperature",
            "stop",
            "top_p",
            "stream",
        ]
    def map_openai_params(self, non_default_params: dict, optional_params: dict):
        for param, value in non_default_params.items():
            if param == "max_tokens" or param == "max_completion_tokens":
                optional_params["max_tokens_to_sample"] = value
            if param == "temperature":
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
            if param == "stop":
                optional_params["stop_sequences"] = value
            if param == "stream" and value is True:
                optional_params["stream"] = value
        return optional_params
 class AmazonCohereConfig(AmazonInvokeMixin, BaseConfig):
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=command
    Supported Params for the Amazon / Cohere models:
    - `max_tokens` (integer) max tokens,
    - `temperature` (float) model temperature,
    - `return_likelihood` (string) n/a
    """
    max_tokens: Optional[int] = None
    temperature: Optional[float] = None
    return_likelihood: Optional[str] = None
    def __init__(
        self,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        return_likelihood: Optional[str] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self, model: str) -> List[str]:
        return [
            "max_tokens",
            "temperature",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "stream":
                optional_params["stream"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "max_tokens":
                optional_params["max_tokens"] = v
        return optional_params
 class AmazonAI21Config(AmazonInvokeMixin, BaseConfig):
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
    Supported Params for the Amazon / AI21 models:
    - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
    - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
    - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
    - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
    - `frequencyPenalty` (object): Placeholder for frequency penalty object.
    - `presencePenalty` (object): Placeholder for presence penalty object.
    - `countPenalty` (object): Placeholder for count penalty object.
    """
    maxTokens: Optional[int] = None
    temperature: Optional[float] = None
    topP: Optional[float] = None
    stopSequences: Optional[list] = None
    frequencePenalty: Optional[dict] = None
    presencePenalty: Optional[dict] = None
    countPenalty: Optional[dict] = None
    def __init__(
        self,
        maxTokens: Optional[int] = None,
        temperature: Optional[float] = None,
        topP: Optional[float] = None,
        stopSequences: Optional[list] = None,
        frequencePenalty: Optional[dict] = None,
        presencePenalty: Optional[dict] = None,
        countPenalty: Optional[dict] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self, model: str) -> List:
        return [
            "max_tokens",
            "temperature",
            "top_p",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "max_tokens":
                optional_params["maxTokens"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "top_p":
                optional_params["topP"] = v
            if k == "stream":
                optional_params["stream"] = v
        return optional_params
 class AnthropicConstants(Enum):
    HUMAN_PROMPT = "\n\nHuman: "
    AI_PROMPT = "\n\nAssistant: "
 class AmazonLlamaConfig(AmazonInvokeMixin, BaseConfig):
    """
    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
    Supported Params for the Amazon / Meta Llama models:
    - `max_gen_len` (integer) max tokens,
    - `temperature` (float) temperature for model,
    - `top_p` (float) top p for model
    """
    max_gen_len: Optional[int] = None
    temperature: Optional[float] = None
    topP: Optional[float] = None
    def __init__(
        self,
        maxTokenCount: Optional[int] = None,
        temperature: Optional[float] = None,
        topP: Optional[int] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self, model: str) -> List:
        return [
            "max_tokens",
            "temperature",
            "top_p",
            "stream",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "max_tokens":
                optional_params["max_gen_len"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "top_p":
                optional_params["top_p"] = v
            if k == "stream":
                optional_params["stream"] = v
        return optional_params
 class AmazonMistralConfig(AmazonInvokeMixin, BaseConfig):
    """
    Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-mistral.html
    Supported Params for the Amazon / Mistral models:
    - `max_tokens` (integer) max tokens,
    - `temperature` (float) temperature for model,
    - `top_p` (float) top p for model
    - `stop` [string] A list of stop sequences that if generated by the model, stops the model from generating further output.
    - `top_k` (float) top k for model
    """
    max_tokens: Optional[int] = None
    temperature: Optional[float] = None
    top_p: Optional[float] = None
    top_k: Optional[float] = None
    stop: Optional[List[str]] = None
    def __init__(
        self,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        top_p: Optional[int] = None,
        top_k: Optional[float] = None,
        stop: Optional[List[str]] = None,
    ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != "self" and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {
            k: v
            for k, v in cls.__dict__.items()
            if not k.startswith("__")
            and not k.startswith("_abc")
            and not isinstance(
                v,
                (
                    types.FunctionType,
                    types.BuiltinFunctionType,
                    classmethod,
                    staticmethod,
                ),
            )
            and v is not None
        }
    def get_supported_openai_params(self, model: str) -> List[str]:
        return ["max_tokens", "temperature", "top_p", "stop", "stream"]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool,
    ) -> dict:
        for k, v in non_default_params.items():
            if k == "max_tokens":
                optional_params["max_tokens"] = v
            if k == "temperature":
                optional_params["temperature"] = v
            if k == "top_p":
                optional_params["top_p"] = v
            if k == "stop":
                optional_params["stop"] = v
            if k == "stream":
                optional_params["stream"] = v
        return optional_params
 def add_custom_header(headers):
    """Closure to capture the headers and add them."""
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -40,6 +40,7 @@ class BaseLLMHTTPHandler:
        data: dict,
        timeout: Union[float, httpx.Timeout],
        litellm_params: dict,
        logging_obj: LiteLLMLoggingObj,
        stream: bool = False,
    ) -> httpx.Response:
        """Common implementation across stream + non-stream calls. Meant to ensure consistent error-handling."""
@ -56,6 +57,7 @@ class BaseLLMHTTPHandler:
                    data=json.dumps(data),
                    timeout=timeout,
                    stream=stream,
                    logging_obj=logging_obj,
                )
            except httpx.HTTPStatusError as e:
                hit_max_retry = i + 1 == max_retry_on_unprocessable_entity_error
@ -93,6 +95,7 @@ class BaseLLMHTTPHandler:
        data: dict,
        timeout: Union[float, httpx.Timeout],
        litellm_params: dict,
        logging_obj: LiteLLMLoggingObj,
        stream: bool = False,
    ) -> httpx.Response:
@ -110,6 +113,7 @@ class BaseLLMHTTPHandler:
                    data=json.dumps(data),
                    timeout=timeout,
                    stream=stream,
                    logging_obj=logging_obj,
                )
            except httpx.HTTPStatusError as e:
                hit_max_retry = i + 1 == max_retry_on_unprocessable_entity_error
@ -173,6 +177,7 @@ class BaseLLMHTTPHandler:
            timeout=timeout,
            litellm_params=litellm_params,
            stream=False,
            logging_obj=logging_obj,
        )
        return provider_config.transform_response(
            model=model,
@ -235,6 +240,15 @@ class BaseLLMHTTPHandler:
            headers=headers,
        )
        headers = provider_config.sign_request(
            headers=headers,
            optional_params=optional_params,
            request_data=data,
            api_base=api_base,
            stream=stream,
            fake_stream=fake_stream,
        )
        ## LOGGING
        logging_obj.pre_call(
            input=messages,
@ -248,8 +262,11 @@ class BaseLLMHTTPHandler:
        if acompletion is True:
            if stream is True:
-                if fake_stream is not True:
+                data = self._add_stream_param_to_request_body(
-                    data["stream"] = stream
+                    data=data,
                    provider_config=provider_config,
                    fake_stream=fake_stream,
                )
                return self.acompletion_stream_function(
                    model=model,
                    messages=messages,
@ -293,8 +310,22 @@ class BaseLLMHTTPHandler:
                )
        if stream is True:
-            if fake_stream is not True:
+            data = self._add_stream_param_to_request_body(
-                data["stream"] = stream
+                data=data,
                provider_config=provider_config,
                fake_stream=fake_stream,
            )
            if provider_config.has_custom_stream_wrapper is True:
                return provider_config.get_sync_custom_stream_wrapper(
                    model=model,
                    custom_llm_provider=custom_llm_provider,
                    logging_obj=logging_obj,
                    api_base=api_base,
                    headers=headers,
                    data=data,
                    messages=messages,
                    client=client,
                )
            completion_stream, headers = self.make_sync_call(
                provider_config=provider_config,
                api_base=api_base,
@ -334,6 +365,7 @@ class BaseLLMHTTPHandler:
            data=data,
            timeout=timeout,
            litellm_params=litellm_params,
            logging_obj=logging_obj,
        )
        return provider_config.transform_response(
            model=model,
@ -383,6 +415,7 @@ class BaseLLMHTTPHandler:
            timeout=timeout,
            litellm_params=litellm_params,
            stream=stream,
            logging_obj=logging_obj,
        )
        if fake_stream is True:
@ -419,6 +452,18 @@ class BaseLLMHTTPHandler:
        fake_stream: bool = False,
        client: Optional[AsyncHTTPHandler] = None,
    ):
        if provider_config.has_custom_stream_wrapper is True:
            return provider_config.get_async_custom_stream_wrapper(
                model=model,
                custom_llm_provider=custom_llm_provider,
                logging_obj=logging_obj,
                api_base=api_base,
                headers=headers,
                data=data,
                messages=messages,
                client=client,
            )
        completion_stream, _response_headers = await self.make_async_call_stream_helper(
            custom_llm_provider=custom_llm_provider,
            provider_config=provider_config,
@ -479,6 +524,7 @@ class BaseLLMHTTPHandler:
            timeout=timeout,
            litellm_params=litellm_params,
            stream=stream,
            logging_obj=logging_obj,
        )
        if fake_stream is True:
@ -499,6 +545,21 @@ class BaseLLMHTTPHandler:
        return completion_stream, response.headers
    def _add_stream_param_to_request_body(
        self,
        data: dict,
        provider_config: BaseConfig,
        fake_stream: bool,
    ) -> dict:
        """
        Some providers like Bedrock invoke do not support the stream parameter in the request body, we only pass `stream` in the request body the provider supports it.
        """
        if fake_stream is True:
            return data
        if provider_config.supports_stream_param_in_request_body is True:
            data["stream"] = True
        return data
    def embedding(
        self,
        model: str,
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2669,35 +2669,23 @@ def completion(  # type: ignore # noqa: PLR0915
                    client=client,
                )
            else:
-                model = model.replace("invoke/", "")
+                response = base_llm_http_handler.completion(
                response = bedrock_chat_completion.completion(
                    model=model,
                    stream=stream,
                    messages=messages,
-                    custom_prompt_dict=custom_prompt_dict,
+                    acompletion=acompletion,
                    api_base=api_base,
                    model_response=model_response,
                    print_verbose=print_verbose,
                    optional_params=optional_params,
                    litellm_params=litellm_params,
-                    logger_fn=logger_fn,
+                    custom_llm_provider="bedrock",
                    encoding=encoding,
                    logging_obj=logging,
                    extra_headers=extra_headers,
                    timeout=timeout,
-                    acompletion=acompletion,
+                    headers=headers,
                    encoding=encoding,
                    api_key=api_key,
                    logging_obj=logging,
                    client=client,
                    api_base=api_base,
                )
            if optional_params.get("stream", False):
                ## LOGGING
                logging.post_call(
                    input=messages,
                    api_key=None,
                    original_response=response,
                )
            ## RESPONSE OBJECT
            response = response
        elif custom_llm_provider == "watsonx":
            response = watsonx_chat_completion.completion(
                model=model,
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -5749,8 +5749,7 @@
        "input_cost_per_token": 0.0000125,
        "output_cost_per_token": 0.0000125,
        "litellm_provider": "bedrock",
-        "mode": "chat",
+        "mode": "chat"
        "supports_tool_choice": true
    },
    "ai21.j2-ultra-v1": {
        "max_tokens": 8191, 
@ -5759,8 +5758,7 @@
        "input_cost_per_token": 0.0000188,
        "output_cost_per_token": 0.0000188,
        "litellm_provider": "bedrock",
-        "mode": "chat",
+        "mode": "chat"
        "supports_tool_choice": true
    },
    "ai21.jamba-instruct-v1:0": {
        "max_tokens": 4096,
@ -5779,8 +5777,7 @@
        "input_cost_per_token": 0.000002,
        "output_cost_per_token": 0.000008,
        "litellm_provider": "bedrock",
-        "mode": "chat",
+        "mode": "chat"
        "supports_tool_choice": true
    },
    "ai21.jamba-1-5-mini-v1:0": {
        "max_tokens": 256000,
@ -5789,8 +5786,7 @@
        "input_cost_per_token": 0.0000002,
        "output_cost_per_token": 0.0000004,
        "litellm_provider": "bedrock",
-        "mode": "chat",
+        "mode": "chat"
        "supports_tool_choice": true
    },
    "amazon.titan-text-lite-v1": {
        "max_tokens": 4000, 
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -6077,6 +6077,8 @@ class ProviderConfigManager:
                return litellm.AmazonCohereConfig()
            elif bedrock_provider == "mistral":  # mistral models on bedrock
                return litellm.AmazonMistralConfig()
            else:
                return litellm.AmazonInvokeConfig()
        return litellm.OpenAIGPTConfig()
    @staticmethod
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -5749,8 +5749,7 @@
        "input_cost_per_token": 0.0000125,
        "output_cost_per_token": 0.0000125,
        "litellm_provider": "bedrock",
-        "mode": "chat",
+        "mode": "chat"
        "supports_tool_choice": true
    },
    "ai21.j2-ultra-v1": {
        "max_tokens": 8191, 
@ -5759,8 +5758,7 @@
        "input_cost_per_token": 0.0000188,
        "output_cost_per_token": 0.0000188,
        "litellm_provider": "bedrock",
-        "mode": "chat",
+        "mode": "chat"
        "supports_tool_choice": true
    },
    "ai21.jamba-instruct-v1:0": {
        "max_tokens": 4096,
@ -5779,8 +5777,7 @@
        "input_cost_per_token": 0.000002,
        "output_cost_per_token": 0.000008,
        "litellm_provider": "bedrock",
-        "mode": "chat",
+        "mode": "chat"
        "supports_tool_choice": true
    },
    "ai21.jamba-1-5-mini-v1:0": {
        "max_tokens": 256000,
@ -5789,8 +5786,7 @@
        "input_cost_per_token": 0.0000002,
        "output_cost_per_token": 0.0000004,
        "litellm_provider": "bedrock",
-        "mode": "chat",
+        "mode": "chat"
        "supports_tool_choice": true
    },
    "amazon.titan-text-lite-v1": {
        "max_tokens": 4000, 
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@ -886,8 +886,11 @@ def test_completion_claude_3_base64():
 def test_completion_bedrock_mistral_completion_auth():
    print("calling bedrock mistral completion params auth")
    import os
    litellm._turn_on_debug()
    # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
    # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
    # aws_region_name = os.environ["AWS_REGION_NAME"]
@ -902,6 +905,7 @@ def test_completion_bedrock_mistral_completion_auth():
            temperature=0.1,
        )  # type: ignore
        # Add any assertions here to check the response
        print(f"response: {response}")
        assert len(response.choices) > 0
        assert len(response.choices[0].message.content) > 0
@ -2581,28 +2585,35 @@ def test_bedrock_custom_deepseek():
        except Exception as e:
            print(f"Error: {str(e)}")
            raise e
-        
+
@pytest.mark.parametrize(
-    "model, expected_output", 
+    "model, expected_output",
    [
        ("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", {"top_k": 3}),
-        ("bedrock/converse/us.amazon.nova-pro-v1:0", {'inferenceConfig': {"topK": 3}}),
+        ("bedrock/converse/us.amazon.nova-pro-v1:0", {"inferenceConfig": {"topK": 3}}),
        ("bedrock/meta.llama3-70b-instruct-v1:0", {}),
-    ]
+    ],
 )
 def test_handle_top_k_value_helper(model, expected_output):
-    assert litellm.AmazonConverseConfig()._handle_top_k_value(model, {"topK": 3}) == expected_output
+    assert (
-    assert litellm.AmazonConverseConfig()._handle_top_k_value(model, {"top_k": 3}) == expected_output
+        litellm.AmazonConverseConfig()._handle_top_k_value(model, {"topK": 3})
        == expected_output
    )
    assert (
        litellm.AmazonConverseConfig()._handle_top_k_value(model, {"top_k": 3})
        == expected_output
    )
@pytest.mark.parametrize(
-    "model, expected_params", 
+    "model, expected_params",
    [
        ("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", {"top_k": 2}),
-        ("bedrock/converse/us.amazon.nova-pro-v1:0", {'inferenceConfig': {"topK": 2}}),
+        ("bedrock/converse/us.amazon.nova-pro-v1:0", {"inferenceConfig": {"topK": 2}}),
        ("bedrock/meta.llama3-70b-instruct-v1:0", {}),
        ("bedrock/mistral.mistral-7b-instruct-v0:2", {}),
-
+    ],
    ]
 )
 def test_bedrock_top_k_param(model, expected_params):
    import json
@ -2611,42 +2622,39 @@ def test_bedrock_top_k_param(model, expected_params):
    with patch.object(client, "post") as mock_post:
        mock_response = Mock()
-        
+
-        if ("mistral" in model):
+        if "mistral" in model:
-            mock_response.text = json.dumps({"outputs": [{"text": "Here's a joke...", "stop_reason": "stop"}]}) 
+            mock_response.text = json.dumps(
                {"outputs": [{"text": "Here's a joke...", "stop_reason": "stop"}]}
            )
        else:
            mock_response.text = json.dumps(
                {
                    "output": {
                        "message": {
                            "role": "assistant",
-                            "content": [
+                            "content": [{"text": "Here's a joke..."}],
                                {
                                    "text": "Here's a joke..."
                                }
                            ]
                        }
                    },
                    "usage": {"inputTokens": 12, "outputTokens": 6, "totalTokens": 18},
-                    "stopReason": "stop"
+                    "stopReason": "stop",
                }
-            ) 
+            )
-            
+
        mock_response.status_code = 200
        # Add required response attributes
        mock_response.headers = {"Content-Type": "application/json"}
        mock_response.json = lambda: json.loads(mock_response.text)
        mock_post.return_value = mock_response
        litellm.completion(
            model=model,
            messages=[{"role": "user", "content": "Hello, world!"}],
            top_k=2,
-            client=client
+            client=client,
-        )  
+        )
        data = json.loads(mock_post.call_args.kwargs["data"])
-        if ("mistral" in model):
+        if "mistral" in model:
-            assert (data["top_k"] == 2)
+            assert data["top_k"] == 2
        else:
-            assert (data["additionalModelRequestFields"] == expected_params)
+            assert data["additionalModelRequestFields"] == expected_params
--- a/tests/llm_translation/test_max_completion_tokens.py
+++ b/tests/llm_translation/test_max_completion_tokens.py
@ -298,7 +298,7 @@ def test_all_model_configs():
        drop_params=False,
    ) == {"max_tokens": 10}
-    from litellm.llms.bedrock.common_utils import (
+    from litellm import (
        AmazonAnthropicClaude3Config,
        AmazonAnthropicConfig,
    )
--- a/tests/llm_translation/test_unit_test_bedrock_invoke.py
+++ b/tests/llm_translation/test_unit_test_bedrock_invoke.py
@ -0,0 +1,214 @@
 import os
 import sys
 import traceback
 from dotenv import load_dotenv
 import litellm.types
 import pytest
 from litellm import AmazonInvokeConfig
 import json
 load_dotenv()
 import io
 import os
 sys.path.insert(0, os.path.abspath("../.."))
 from unittest.mock import AsyncMock, Mock, patch
 # Initialize the transformer
@pytest.fixture
 def bedrock_transformer():
    return AmazonInvokeConfig()
 def test_get_complete_url_basic(bedrock_transformer):
    """Test basic URL construction for non-streaming request"""
    url = bedrock_transformer.get_complete_url(
        api_base="https://bedrock-runtime.us-east-1.amazonaws.com",
        model="anthropic.claude-v2",
        optional_params={},
        stream=False,
    )
    assert (
        url
        == "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke"
    )
 def test_get_complete_url_streaming(bedrock_transformer):
    """Test URL construction for streaming request"""
    url = bedrock_transformer.get_complete_url(
        api_base="https://bedrock-runtime.us-east-1.amazonaws.com",
        model="anthropic.claude-v2",
        optional_params={},
        stream=True,
    )
    assert (
        url
        == "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke-with-response-stream"
    )
 def test_transform_request_invalid_provider(bedrock_transformer):
    """Test request transformation with invalid provider"""
    messages = [{"role": "user", "content": "Hello"}]
    with pytest.raises(Exception) as exc_info:
        bedrock_transformer.transform_request(
            model="invalid.model",
            messages=messages,
            optional_params={},
            litellm_params={},
            headers={},
        )
    assert "Unknown provider" in str(exc_info.value)
@patch("botocore.auth.SigV4Auth")
@patch("botocore.awsrequest.AWSRequest")
 def test_sign_request_basic(mock_aws_request, mock_sigv4_auth, bedrock_transformer):
    """Test basic request signing without extra headers"""
    # Mock credentials
    mock_credentials = Mock()
    bedrock_transformer.get_credentials = Mock(return_value=mock_credentials)
    # Setup mock SigV4Auth instance
    mock_auth_instance = Mock()
    mock_sigv4_auth.return_value = mock_auth_instance
    # Setup mock AWSRequest instance
    mock_request = Mock()
    mock_request.headers = {
        "Authorization": "AWS4-HMAC-SHA256 Credential=...",
        "X-Amz-Date": "20240101T000000Z",
        "Content-Type": "application/json",
    }
    mock_aws_request.return_value = mock_request
    # Test parameters
    headers = {}
    optional_params = {"aws_region_name": "us-east-1"}
    request_data = {"prompt": "Hello"}
    api_base = "https://bedrock-runtime.us-east-1.amazonaws.com"
    # Call the method
    result = bedrock_transformer.sign_request(
        headers=headers,
        optional_params=optional_params,
        request_data=request_data,
        api_base=api_base,
    )
    # Verify the results
    mock_sigv4_auth.assert_called_once_with(mock_credentials, "bedrock", "us-east-1")
    mock_aws_request.assert_called_once_with(
        method="POST",
        url=api_base,
        data='{"prompt": "Hello"}',
        headers={"Content-Type": "application/json"},
    )
    mock_auth_instance.add_auth.assert_called_once_with(mock_request)
    assert result == mock_request.headers
 def test_transform_request_cohere_command(bedrock_transformer):
    """Test request transformation for Cohere Command model"""
    messages = [{"role": "user", "content": "Hello"}]
    result = bedrock_transformer.transform_request(
        model="cohere.command-r",
        messages=messages,
        optional_params={"max_tokens": 2048},
        litellm_params={},
        headers={},
    )
    print(
        "transformed request for invoke cohere command=", json.dumps(result, indent=4)
    )
    expected_result = {"message": "Hello", "max_tokens": 2048, "chat_history": []}
    assert result == expected_result
 def test_transform_request_ai21(bedrock_transformer):
    """Test request transformation for AI21"""
    messages = [{"role": "user", "content": "Hello"}]
    result = bedrock_transformer.transform_request(
        model="ai21.j2-ultra",
        messages=messages,
        optional_params={"max_tokens": 2048},
        litellm_params={},
        headers={},
    )
    print("transformed request for invoke ai21=", json.dumps(result, indent=4))
    expected_result = {
        "prompt": "Hello",
        "max_tokens": 2048,
    }
    assert result == expected_result
 def test_transform_request_mistral(bedrock_transformer):
    """Test request transformation for Mistral"""
    messages = [{"role": "user", "content": "Hello"}]
    result = bedrock_transformer.transform_request(
        model="mistral.mistral-7b",
        messages=messages,
        optional_params={"max_tokens": 2048},
        litellm_params={},
        headers={},
    )
    print("transformed request for invoke mistral=", json.dumps(result, indent=4))
    expected_result = {
        "prompt": "<s>[INST] Hello [/INST]\n",
        "max_tokens": 2048,
    }
    assert result == expected_result
 def test_transform_request_amazon_titan(bedrock_transformer):
    """Test request transformation for Amazon Titan"""
    messages = [{"role": "user", "content": "Hello"}]
    result = bedrock_transformer.transform_request(
        model="amazon.titan-text-express-v1",
        messages=messages,
        optional_params={"maxTokenCount": 2048},
        litellm_params={},
        headers={},
    )
    print("transformed request for invoke amazon titan=", json.dumps(result, indent=4))
    expected_result = {
        "inputText": "\n\nUser: Hello\n\nBot: ",
        "textGenerationConfig": {
            "maxTokenCount": 2048,
        },
    }
    assert result == expected_result
 def test_transform_request_meta_llama(bedrock_transformer):
    """Test request transformation for Meta/Llama"""
    messages = [{"role": "user", "content": "Hello"}]
    result = bedrock_transformer.transform_request(
        model="meta.llama2-70b",
        messages=messages,
        optional_params={"max_gen_len": 2048},
        litellm_params={},
        headers={},
    )
    print("transformed request for invoke meta llama=", json.dumps(result, indent=4))
    expected_result = {"prompt": "Hello", "max_gen_len": 2048}
    assert result == expected_result
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@ -765,6 +765,7 @@ async def test_async_chat_vertex_ai_stream():
@pytest.mark.asyncio
@pytest.mark.skip(reason="temp-skip to see what else is failing")
 async def test_async_text_completion_bedrock():
    try:
        customHandler = CompletionCustomHandler()