Bedrock Embeddings refactor + model support (#5462)

* refactor(bedrock): initial commit to refactor bedrock to a folder Improve code readability + maintainability * refactor: more refactor work * fix: fix imports * feat(bedrock/embeddings.py): support translating embedding into amazon embedding formats * fix: fix linting errors * test: skip test on end of life model * fix(cohere/embed.py): fix linting error * fix(cohere/embed.py): fix typing * fix(cohere/embed.py): fix post-call logging for cohere embedding call * test(test_embeddings.py): fix error message assertion in test
2025-04-26 11:14:04 +00:00 · 2024-09-01 13:29:58 -07:00 · 2024-09-01 13:29:58 -07:00 · 37f9705d6e
commit 37f9705d6e
parent 6fb82aaf75
21 changed files with 1946 additions and 1659 deletions
--- a/litellm/llms/bedrock/embed/embedding.py
+++ b/litellm/llms/bedrock/embed/embedding.py
@ -0,0 +1,498 @@
+"""
+Handles embedding calls to Bedrock's `/invoke` endpoint 
+"""
+
+import copy
+import json
+import os
+from copy import deepcopy
+from typing import Any, Callable, List, Literal, Optional, Tuple, Union
+
+import httpx
+
+import litellm
+from litellm import get_secret
+from litellm.llms.cohere.embed import embedding as cohere_embedding
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    _get_httpx_client,
+)
+from litellm.types.llms.bedrock import AmazonEmbeddingRequest, CohereEmbeddingRequest
+from litellm.types.utils import Embedding, EmbeddingResponse, Usage
+
+from ...base_aws_llm import BaseAWSLLM
+from ..common_utils import BedrockError, get_runtime_endpoint
+from .amazon_titan_g1_transformation import AmazonTitanG1Config
+from .amazon_titan_multimodal_transformation import (
+    _transform_request as amazon_multimodal_transform_request,
+)
+from .amazon_titan_multimodal_transformation import (
+    _transform_response as amazon_multimodal_transform_response,
+)
+from .amazon_titan_v2_transformation import AmazonTitanV2Config
+from .cohere_transformation import _transform_request as cohere_transform_request
+
+
+class BedrockEmbedding(BaseAWSLLM):
+    def _load_credentials(
+        self,
+        optional_params: dict,
+    ) -> Tuple[Any, str]:
+        try:
+            from botocore.credentials import Credentials
+        except ImportError as e:
+            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+        ## CREDENTIALS ##
+        # pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
+        aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
+        aws_access_key_id = optional_params.pop("aws_access_key_id", None)
+        aws_session_token = optional_params.pop("aws_session_token", None)
+        aws_region_name = optional_params.pop("aws_region_name", None)
+        aws_role_name = optional_params.pop("aws_role_name", None)
+        aws_session_name = optional_params.pop("aws_session_name", None)
+        aws_profile_name = optional_params.pop("aws_profile_name", None)
+        aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
+        aws_sts_endpoint = optional_params.pop("aws_sts_endpoint", None)
+
+        ### SET REGION NAME ###
+        if aws_region_name is None:
+            # check env #
+            litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
+
+            if litellm_aws_region_name is not None and isinstance(
+                litellm_aws_region_name, str
+            ):
+                aws_region_name = litellm_aws_region_name
+
+            standard_aws_region_name = get_secret("AWS_REGION", None)
+            if standard_aws_region_name is not None and isinstance(
+                standard_aws_region_name, str
+            ):
+                aws_region_name = standard_aws_region_name
+
+            if aws_region_name is None:
+                aws_region_name = "us-west-2"
+
+        credentials: Credentials = self.get_credentials(
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_session_token=aws_session_token,
+            aws_region_name=aws_region_name,
+            aws_session_name=aws_session_name,
+            aws_profile_name=aws_profile_name,
+            aws_role_name=aws_role_name,
+            aws_web_identity_token=aws_web_identity_token,
+            aws_sts_endpoint=aws_sts_endpoint,
+        )
+        return credentials, aws_region_name
+
+    async def async_embeddings(self):
+        pass
+
+    def _make_sync_call(
+        self,
+        client: Optional[HTTPHandler],
+        timeout: Optional[Union[float, httpx.Timeout]],
+        api_base: str,
+        headers: dict,
+        data: dict,
+    ) -> dict:
+        if client is None or not isinstance(client, HTTPHandler):
+            _params = {}
+            if timeout is not None:
+                if isinstance(timeout, float) or isinstance(timeout, int):
+                    timeout = httpx.Timeout(timeout)
+                _params["timeout"] = timeout
+            client = _get_httpx_client(_params)  # type: ignore
+        else:
+            client = client
+        try:
+            response = client.post(url=api_base, headers=headers, data=json.dumps(data))  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise BedrockError(status_code=error_code, message=response.text)
+        except httpx.TimeoutException:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
+
+        return response.json()
+
+    def _single_func_embeddings(
+        self,
+        client: Optional[HTTPHandler],
+        timeout: Optional[Union[float, httpx.Timeout]],
+        batch_data: List[dict],
+        credentials: Any,
+        extra_headers: Optional[dict],
+        endpoint_url: str,
+        aws_region_name: str,
+        model: str,
+        logging_obj: Any,
+    ):
+        try:
+            import boto3
+            from botocore.auth import SigV4Auth
+            from botocore.awsrequest import AWSRequest
+            from botocore.credentials import Credentials
+        except ImportError:
+            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+
+        responses: List[dict] = []
+        for data in batch_data:
+            sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
+            headers = {"Content-Type": "application/json"}
+            if extra_headers is not None:
+                headers = {"Content-Type": "application/json", **extra_headers}
+            request = AWSRequest(
+                method="POST", url=endpoint_url, data=json.dumps(data), headers=headers
+            )
+            sigv4.add_auth(request)
+            if (
+                extra_headers is not None and "Authorization" in extra_headers
+            ):  # prevent sigv4 from overwriting the auth header
+                request.headers["Authorization"] = extra_headers["Authorization"]
+            prepped = request.prepare()
+
+            ## LOGGING
+            logging_obj.pre_call(
+                input=data,
+                api_key="",
+                additional_args={
+                    "complete_input_dict": data,
+                    "api_base": prepped.url,
+                    "headers": prepped.headers,
+                },
+            )
+            response = self._make_sync_call(
+                client=client,
+                timeout=timeout,
+                api_base=prepped.url,
+                headers=prepped.headers,
+                data=data,
+            )
+
+            ## LOGGING
+            logging_obj.post_call(
+                input=data,
+                api_key="",
+                original_response=response,
+                additional_args={"complete_input_dict": data},
+            )
+
+            responses.append(response)
+
+        returned_response: Optional[EmbeddingResponse] = None
+
+        ## TRANSFORM RESPONSE ##
+        if model == "amazon.titan-embed-image-v1":
+            returned_response = amazon_multimodal_transform_response(
+                response_list=responses, model=model
+            )
+        elif model == "amazon.titan-embed-text-v1":
+            returned_response = AmazonTitanG1Config()._transform_response(
+                response_list=responses, model=model
+            )
+        elif model == "amazon.titan-embed-text-v2:0":
+            returned_response = AmazonTitanV2Config()._transform_response(
+                response_list=responses, model=model
+            )
+
+        if returned_response is None:
+            raise Exception(
+                "Unable to map model response to known provider format. model={}".format(
+                    model
+                )
+            )
+
+        return returned_response
+
+    def embeddings(
+        self,
+        model: str,
+        input: List[str],
+        api_base: Optional[str],
+        model_response: EmbeddingResponse,
+        print_verbose: Callable,
+        encoding,
+        logging_obj,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]],
+        timeout: Optional[Union[float, httpx.Timeout]],
+        aembedding: Optional[bool],
+        extra_headers: Optional[dict],
+        optional_params=None,
+        litellm_params=None,
+    ) -> EmbeddingResponse:
+        try:
+            import boto3
+            from botocore.auth import SigV4Auth
+            from botocore.awsrequest import AWSRequest
+            from botocore.credentials import Credentials
+        except ImportError:
+            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+
+        credentials, aws_region_name = self._load_credentials(optional_params)
+
+        ### TRANSFORMATION ###
+        provider = model.split(".")[0]
+        inference_params = copy.deepcopy(optional_params)
+        inference_params.pop(
+            "user", None
+        )  # make sure user is not passed in for bedrock call
+        modelId = (
+            optional_params.pop("model_id", None) or model
+        )  # default to model if not passed
+
+        data: Optional[CohereEmbeddingRequest] = None
+        batch_data: Optional[List] = None
+        if provider == "cohere":
+            data = cohere_transform_request(
+                input=input, inference_params=inference_params
+            )
+        elif provider == "amazon" and model in [
+            "amazon.titan-embed-image-v1",
+            "amazon.titan-embed-text-v1",
+            "amazon.titan-embed-text-v2:0",
+        ]:
+            batch_data = []
+            for i in input:
+                if model == "amazon.titan-embed-image-v1":
+                    transformed_request: AmazonEmbeddingRequest = (
+                        amazon_multimodal_transform_request(
+                            input=i, inference_params=inference_params
+                        )
+                    )
+                elif model == "amazon.titan-embed-text-v1":
+                    transformed_request = AmazonTitanG1Config()._transform_request(
+                        input=i, inference_params=inference_params
+                    )
+                elif model == "amazon.titan-embed-text-v2:0":
+                    transformed_request = AmazonTitanV2Config()._transform_request(
+                        input=i, inference_params=inference_params
+                    )
+                batch_data.append(transformed_request)
+
+        ### SET RUNTIME ENDPOINT ###
+        endpoint_url = get_runtime_endpoint(
+            api_base=api_base,
+            aws_bedrock_runtime_endpoint=optional_params.pop(
+                "aws_bedrock_runtime_endpoint", None
+            ),
+            aws_region_name=aws_region_name,
+        )
+        endpoint_url = f"{endpoint_url}/model/{modelId}/invoke"
+
+        if batch_data is not None:
+            return self._single_func_embeddings(
+                client=(
+                    client
+                    if client is not None and isinstance(client, HTTPHandler)
+                    else None
+                ),
+                timeout=timeout,
+                batch_data=batch_data,
+                credentials=credentials,
+                extra_headers=extra_headers,
+                endpoint_url=endpoint_url,
+                aws_region_name=aws_region_name,
+                model=model,
+                logging_obj=logging_obj,
+            )
+        elif data is None:
+            raise Exception("Unable to map request to provider")
+
+        sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
+        headers = {"Content-Type": "application/json"}
+        if extra_headers is not None:
+            headers = {"Content-Type": "application/json", **extra_headers}
+        request = AWSRequest(
+            method="POST", url=endpoint_url, data=json.dumps(data), headers=headers
+        )
+        sigv4.add_auth(request)
+        if (
+            extra_headers is not None and "Authorization" in extra_headers
+        ):  # prevent sigv4 from overwriting the auth header
+            request.headers["Authorization"] = extra_headers["Authorization"]
+        prepped = request.prepare()
+
+        ## ROUTING ##
+        return cohere_embedding(
+            model=model,
+            input=input,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            optional_params=optional_params,
+            encoding=encoding,
+            data=data,  # type: ignore
+            complete_api_base=prepped.url,
+            api_key=None,
+            aembedding=aembedding,
+            timeout=timeout,
+            client=client,
+            headers=prepped.headers,
+        )
+
+    # def _embedding_func_single(
+    #     model: str,
+    #     input: str,
+    #     client: Any,
+    #     optional_params=None,
+    #     encoding=None,
+    #     logging_obj=None,
+    # ):
+    #     if isinstance(input, str) is False:
+    #         raise BedrockError(
+    #             message="Bedrock Embedding API input must be type str | List[str]",
+    #             status_code=400,
+    #         )
+    #     # logic for parsing in - calling - parsing out model embedding calls
+    #     ## FORMAT EMBEDDING INPUT ##
+    #     provider = model.split(".")[0]
+    #     inference_params = copy.deepcopy(optional_params)
+    #     inference_params.pop(
+    #         "user", None
+    #     )  # make sure user is not passed in for bedrock call
+    #     modelId = (
+    #         optional_params.pop("model_id", None) or model
+    #     )  # default to model if not passed
+    #     if provider == "amazon":
+    #         input = input.replace(os.linesep, " ")
+    #         data = {"inputText": input, **inference_params}
+    #         # data = json.dumps(data)
+    #     elif provider == "cohere":
+    #         inference_params["input_type"] = inference_params.get(
+    #             "input_type", "search_document"
+    #         )  # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
+    #         data = {"texts": [input], **inference_params}  # type: ignore
+    #     body = json.dumps(data).encode("utf-8")  # type: ignore
+    #     ## LOGGING
+    #     request_str = f"""
+    #     response = client.invoke_model(
+    #         body={body},
+    #         modelId={modelId},
+    #         accept="*/*",
+    #         contentType="application/json",
+    #     )"""  # type: ignore
+    #     logging_obj.pre_call(
+    #         input=input,
+    #         api_key="",  # boto3 is used for init.
+    #         additional_args={
+    #             "complete_input_dict": {"model": modelId, "texts": input},
+    #             "request_str": request_str,
+    #         },
+    #     )
+    #     try:
+    #         response = client.invoke_model(
+    #             body=body,
+    #             modelId=modelId,
+    #             accept="*/*",
+    #             contentType="application/json",
+    #         )
+    #         response_body = json.loads(response.get("body").read())
+    #         ## LOGGING
+    #         logging_obj.post_call(
+    #             input=input,
+    #             api_key="",
+    #             additional_args={"complete_input_dict": data},
+    #             original_response=json.dumps(response_body),
+    #         )
+    #         if provider == "cohere":
+    #             response = response_body.get("embeddings")
+    #             # flatten list
+    #             response = [item for sublist in response for item in sublist]
+    #             return response
+    #         elif provider == "amazon":
+    #             return response_body.get("embedding")
+    #     except Exception as e:
+    #         raise BedrockError(
+    #             message=f"Embedding Error with model {model}: {e}", status_code=500
+    #         )
+
+    # def embedding(
+    #     model: str,
+    #     input: Union[list, str],
+    #     model_response: litellm.EmbeddingResponse,
+    #     api_key: Optional[str] = None,
+    #     logging_obj=None,
+    #     optional_params=None,
+    #     encoding=None,
+    # ):
+    #     ### BOTO3 INIT ###
+    #     # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
+    #     aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
+    #     aws_access_key_id = optional_params.pop("aws_access_key_id", None)
+    #     aws_region_name = optional_params.pop("aws_region_name", None)
+    #     aws_role_name = optional_params.pop("aws_role_name", None)
+    #     aws_session_name = optional_params.pop("aws_session_name", None)
+    #     aws_bedrock_runtime_endpoint = optional_params.pop(
+    #         "aws_bedrock_runtime_endpoint", None
+    #     )
+    #     aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
+
+    #     # use passed in BedrockRuntime.Client if provided, otherwise create a new one
+    #     client = init_bedrock_client(
+    #         aws_access_key_id=aws_access_key_id,
+    #         aws_secret_access_key=aws_secret_access_key,
+    #         aws_region_name=aws_region_name,
+    #         aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
+    #         aws_web_identity_token=aws_web_identity_token,
+    #         aws_role_name=aws_role_name,
+    #         aws_session_name=aws_session_name,
+    #     )
+    #     if isinstance(input, str):
+    #         ## Embedding Call
+    #         embeddings = [
+    #             _embedding_func_single(
+    #                 model,
+    #                 input,
+    #                 optional_params=optional_params,
+    #                 client=client,
+    #                 logging_obj=logging_obj,
+    #             )
+    #         ]
+    #     elif isinstance(input, list):
+    #         ## Embedding Call - assuming this is a List[str]
+    #         embeddings = [
+    #             _embedding_func_single(
+    #                 model,
+    #                 i,
+    #                 optional_params=optional_params,
+    #                 client=client,
+    #                 logging_obj=logging_obj,
+    #             )
+    #             for i in input
+    #         ]  # [TODO]: make these parallel calls
+    #     else:
+    #         # enters this branch if input = int, ex. input=2
+    #         raise BedrockError(
+    #             message="Bedrock Embedding API input must be type str | List[str]",
+    #             status_code=400,
+    #         )
+
+    #     ## Populate OpenAI compliant dictionary
+    #     embedding_response = []
+    #     for idx, embedding in enumerate(embeddings):
+    #         embedding_response.append(
+    #             {
+    #                 "object": "embedding",
+    #                 "index": idx,
+    #                 "embedding": embedding,
+    #             }
+    #         )
+    #     model_response.object = "list"
+    #     model_response.data = embedding_response
+    #     model_response.model = model
+    #     input_tokens = 0
+
+    #     input_str = "".join(input)
+
+    #     input_tokens += len(encoding.encode(input_str))
+
+    #     usage = Usage(
+    #         prompt_tokens=input_tokens,
+    #         completion_tokens=0,
+    #         total_tokens=input_tokens + 0,
+    #     )
+    #     model_response.usage = usage
+
+    #     return model_response