From 8e7363acf5acfa47603ba88f614221684157b51f Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 15 Mar 2025 14:03:37 -0700 Subject: [PATCH] fix(converse_transformation.py): fix encoding model --- docs/my-website/docs/providers/bedrock.md | 676 ++++++++++-------- litellm/llms/bedrock/base_aws_llm.py | 20 +- litellm/llms/bedrock/chat/converse_handler.py | 44 +- .../test_bedrock_completion.py | 25 + 4 files changed, 441 insertions(+), 324 deletions(-) diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md index 1416006bf1..19b3728882 100644 --- a/docs/my-website/docs/providers/bedrock.md +++ b/docs/my-website/docs/providers/bedrock.md @@ -1262,308 +1262,6 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \ -## Boto3 - Authentication - -### Passing credentials as parameters - Completion() -Pass AWS credentials as parameters to litellm.completion -```python -import os -from litellm import completion - -response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - aws_access_key_id="", - aws_secret_access_key="", - aws_region_name="", -) -``` - -### Passing extra headers + Custom API Endpoints - -This can be used to override existing headers (e.g. `Authorization`) when calling custom api endpoints - - - - -```python -import os -import litellm -from litellm import completion - -litellm.set_verbose = True # 👈 SEE RAW REQUEST - -response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - aws_access_key_id="", - aws_secret_access_key="", - aws_region_name="", - aws_bedrock_runtime_endpoint="https://my-fake-endpoint.com", - extra_headers={"key": "value"} -) -``` - - - -1. Setup config.yaml - -```yaml -model_list: - - model_name: bedrock-model - litellm_params: - model: bedrock/anthropic.claude-instant-v1 - aws_access_key_id: "", - aws_secret_access_key: "", - aws_region_name: "", - aws_bedrock_runtime_endpoint: "https://my-fake-endpoint.com", - extra_headers: {"key": "value"} -``` - -2. Start proxy - -```bash -litellm --config /path/to/config.yaml --detailed_debug -``` - -3. Test it! - -```bash -curl -X POST 'http://0.0.0.0:4000/chat/completions' \ --H 'Content-Type: application/json' \ --H 'Authorization: Bearer sk-1234' \ --d '{ - "model": "bedrock-model", - "messages": [ - { - "role": "system", - "content": "You are a helpful math tutor. Guide the user through the solution step by step." - }, - { - "role": "user", - "content": "how can I solve 8x + 7 = -23" - } - ] -}' -``` - - - - -### SSO Login (AWS Profile) -- Set `AWS_PROFILE` environment variable -- Make bedrock completion call -```python -import os -from litellm import completion - -response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=[{ "content": "Hello, how are you?","role": "user"}] -) -``` - -or pass `aws_profile_name`: - -```python -import os -from litellm import completion - -response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - aws_profile_name="dev-profile", -) -``` - -### STS (Role-based Auth) - -- Set `aws_role_name` and `aws_session_name` - - -| LiteLLM Parameter | Boto3 Parameter | Description | Boto3 Documentation | -|------------------|-----------------|-------------|-------------------| -| `aws_access_key_id` | `aws_access_key_id` | AWS access key associated with an IAM user or role | [Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) | -| `aws_secret_access_key` | `aws_secret_access_key` | AWS secret key associated with the access key | [Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) | -| `aws_role_name` | `RoleArn` | The Amazon Resource Name (ARN) of the role to assume | [AssumeRole API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html#STS.Client.assume_role) | -| `aws_session_name` | `RoleSessionName` | An identifier for the assumed role session | [AssumeRole API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html#STS.Client.assume_role) | - - - -Make the bedrock completion call - - - - -```python -from litellm import completion - -response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=messages, - max_tokens=10, - temperature=0.1, - aws_role_name=aws_role_name, - aws_session_name="my-test-session", - ) -``` - -If you also need to dynamically set the aws user accessing the role, add the additional args in the completion()/embedding() function - -```python -from litellm import completion - -response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=messages, - max_tokens=10, - temperature=0.1, - aws_region_name=aws_region_name, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_role_name=aws_role_name, - aws_session_name="my-test-session", - ) -``` - - - - -```yaml -model_list: - - model_name: bedrock/* - litellm_params: - model: bedrock/* - aws_role_name: arn:aws:iam::888602223428:role/iam_local_role # AWS RoleArn - aws_session_name: "bedrock-session" # AWS RoleSessionName - aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # [OPTIONAL - not required if using role] - aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # [OPTIONAL - not required if using role] -``` - - - - - - - -### Passing an external BedrockRuntime.Client as a parameter - Completion() - -:::warning - -This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues) - - -Experimental - 2024-Jun-23: - `aws_access_key_id`, `aws_secret_access_key`, and `aws_session_token` will be extracted from boto3.client and be passed into the httpx client - -::: - -Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth. - -Create a client from session credentials: -```python -import boto3 -from litellm import completion - -bedrock = boto3.client( - service_name="bedrock-runtime", - region_name="us-east-1", - aws_access_key_id="", - aws_secret_access_key="", - aws_session_token="", -) - -response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - aws_bedrock_client=bedrock, -) -``` - -Create a client from AWS profile in `~/.aws/config`: -```python -import boto3 -from litellm import completion - -dev_session = boto3.Session(profile_name="dev-profile") -bedrock = dev_session.client( - service_name="bedrock-runtime", - region_name="us-east-1", -) - -response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=[{ "content": "Hello, how are you?","role": "user"}], - aws_bedrock_client=bedrock, -) -``` -## Calling via Internal Proxy - -Use the `bedrock/converse_like/model` endpoint to call bedrock converse model via your internal proxy. - - - - -```python -from litellm import completion - -response = completion( - model="bedrock/converse_like/some-model", - messages=[{"role": "user", "content": "What's AWS?"}], - api_key="sk-1234", - api_base="https://some-api-url/models", - extra_headers={"test": "hello world"}, -) -``` - - - - -1. Setup config.yaml - -```yaml -model_list: - - model_name: anthropic-claude - litellm_params: - model: bedrock/converse_like/some-model - api_base: https://some-api-url/models -``` - -2. Start proxy server - -```bash -litellm --config config.yaml - -# RUNNING on http://0.0.0.0:4000 -``` - -3. Test it! - -```bash -curl -X POST 'http://0.0.0.0:4000/chat/completions' \ --H 'Content-Type: application/json' \ --H 'Authorization: Bearer sk-1234' \ --d '{ - "model": "anthropic-claude", - "messages": [ - { - "role": "system", - "content": "You are a helpful math tutor. Guide the user through the solution step by step." - }, - { "content": "Hello, how are you?", "role": "user" } - ] -}' -``` - - - - -**Expected Output URL** - -```bash -https://some-api-url/models -``` - ## Bedrock Imported Models (Deepseek, Deepseek R1) ### Deepseek R1 @@ -1958,3 +1656,377 @@ curl http://0.0.0.0:4000/rerank \ +## Bedrock Application Inference Profile + +Use Bedrock Application Inference Profile to track costs for projects on AWS. + +You can either pass it in the model name - `model="bedrock/arn:...` or as a separate `model_id="arn:..` param. + +### Set via `model_id` + + + + +```python +from litellm import completion +import os + +os.environ["AWS_ACCESS_KEY_ID"] = "" +os.environ["AWS_SECRET_ACCESS_KEY"] = "" +os.environ["AWS_REGION_NAME"] = "" + +response = completion( + model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", + messages=[{"role": "user", "content": "Hello, how are you?"}], + model_id="arn:aws:bedrock:eu-central-1:000000000000:application-inference-profile/a0a0a0a0a0a0", +) + +print(response) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: anthropic-claude-3-5-sonnet + litellm_params: + model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 + # You have to set the ARN application inference profile in the model_id parameter + model_id: arn:aws:bedrock:eu-central-1:000000000000:application-inference-profile/a0a0a0a0a0a0 +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer $LITELLM_API_KEY' \ +-d '{ + "model": "anthropic-claude-3-5-sonnet", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "List 5 important events in the XIX century" + } + ] + } + ] +}' +``` + + + + +## Boto3 - Authentication + +### Passing credentials as parameters - Completion() +Pass AWS credentials as parameters to litellm.completion +```python +import os +from litellm import completion + +response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + aws_access_key_id="", + aws_secret_access_key="", + aws_region_name="", +) +``` + +### Passing extra headers + Custom API Endpoints + +This can be used to override existing headers (e.g. `Authorization`) when calling custom api endpoints + + + + +```python +import os +import litellm +from litellm import completion + +litellm.set_verbose = True # 👈 SEE RAW REQUEST + +response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + aws_access_key_id="", + aws_secret_access_key="", + aws_region_name="", + aws_bedrock_runtime_endpoint="https://my-fake-endpoint.com", + extra_headers={"key": "value"} +) +``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: bedrock-model + litellm_params: + model: bedrock/anthropic.claude-instant-v1 + aws_access_key_id: "", + aws_secret_access_key: "", + aws_region_name: "", + aws_bedrock_runtime_endpoint: "https://my-fake-endpoint.com", + extra_headers: {"key": "value"} +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml --detailed_debug +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "bedrock-model", + "messages": [ + { + "role": "system", + "content": "You are a helpful math tutor. Guide the user through the solution step by step." + }, + { + "role": "user", + "content": "how can I solve 8x + 7 = -23" + } + ] +}' +``` + + + + +### SSO Login (AWS Profile) +- Set `AWS_PROFILE` environment variable +- Make bedrock completion call +```python +import os +from litellm import completion + +response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=[{ "content": "Hello, how are you?","role": "user"}] +) +``` + +or pass `aws_profile_name`: + +```python +import os +from litellm import completion + +response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + aws_profile_name="dev-profile", +) +``` + +### STS (Role-based Auth) + +- Set `aws_role_name` and `aws_session_name` + + +| LiteLLM Parameter | Boto3 Parameter | Description | Boto3 Documentation | +|------------------|-----------------|-------------|-------------------| +| `aws_access_key_id` | `aws_access_key_id` | AWS access key associated with an IAM user or role | [Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) | +| `aws_secret_access_key` | `aws_secret_access_key` | AWS secret key associated with the access key | [Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) | +| `aws_role_name` | `RoleArn` | The Amazon Resource Name (ARN) of the role to assume | [AssumeRole API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html#STS.Client.assume_role) | +| `aws_session_name` | `RoleSessionName` | An identifier for the assumed role session | [AssumeRole API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html#STS.Client.assume_role) | + + + +Make the bedrock completion call + + + + +```python +from litellm import completion + +response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=messages, + max_tokens=10, + temperature=0.1, + aws_role_name=aws_role_name, + aws_session_name="my-test-session", + ) +``` + +If you also need to dynamically set the aws user accessing the role, add the additional args in the completion()/embedding() function + +```python +from litellm import completion + +response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=messages, + max_tokens=10, + temperature=0.1, + aws_region_name=aws_region_name, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_role_name=aws_role_name, + aws_session_name="my-test-session", + ) +``` + + + + +```yaml +model_list: + - model_name: bedrock/* + litellm_params: + model: bedrock/* + aws_role_name: arn:aws:iam::888602223428:role/iam_local_role # AWS RoleArn + aws_session_name: "bedrock-session" # AWS RoleSessionName + aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # [OPTIONAL - not required if using role] + aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # [OPTIONAL - not required if using role] +``` + + + + + + + +### Passing an external BedrockRuntime.Client as a parameter - Completion() + +:::warning + +This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues) + + +Experimental - 2024-Jun-23: + `aws_access_key_id`, `aws_secret_access_key`, and `aws_session_token` will be extracted from boto3.client and be passed into the httpx client + +::: + +Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth. + +Create a client from session credentials: +```python +import boto3 +from litellm import completion + +bedrock = boto3.client( + service_name="bedrock-runtime", + region_name="us-east-1", + aws_access_key_id="", + aws_secret_access_key="", + aws_session_token="", +) + +response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + aws_bedrock_client=bedrock, +) +``` + +Create a client from AWS profile in `~/.aws/config`: +```python +import boto3 +from litellm import completion + +dev_session = boto3.Session(profile_name="dev-profile") +bedrock = dev_session.client( + service_name="bedrock-runtime", + region_name="us-east-1", +) + +response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=[{ "content": "Hello, how are you?","role": "user"}], + aws_bedrock_client=bedrock, +) +``` +## Calling via Internal Proxy (not bedrock url compatible) + +Use the `bedrock/converse_like/model` endpoint to call bedrock converse model via your internal proxy. + + + + +```python +from litellm import completion + +response = completion( + model="bedrock/converse_like/some-model", + messages=[{"role": "user", "content": "What's AWS?"}], + api_key="sk-1234", + api_base="https://some-api-url/models", + extra_headers={"test": "hello world"}, +) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: anthropic-claude + litellm_params: + model: bedrock/converse_like/some-model + api_base: https://some-api-url/models +``` + +2. Start proxy server + +```bash +litellm --config config.yaml + +# RUNNING on http://0.0.0.0:4000 +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "anthropic-claude", + "messages": [ + { + "role": "system", + "content": "You are a helpful math tutor. Guide the user through the solution step by step." + }, + { "content": "Hello, how are you?", "role": "user" } + ] +}' +``` + + + + +**Expected Output URL** + +```bash +https://some-api-url/models +``` diff --git a/litellm/llms/bedrock/base_aws_llm.py b/litellm/llms/bedrock/base_aws_llm.py index 86b47675d4..5482d80687 100644 --- a/litellm/llms/bedrock/base_aws_llm.py +++ b/litellm/llms/bedrock/base_aws_llm.py @@ -279,16 +279,30 @@ class BaseAWSLLM: return None def _get_aws_region_name( - self, optional_params: dict, model: Optional[str] = None + self, + optional_params: dict, + model: Optional[str] = None, + model_id: Optional[str] = None, ) -> str: """ - Get the AWS region name from the environment variables + Get the AWS region name from the environment variables. + + Parameters: + optional_params (dict): Optional parameters for the model call + model (str): The model name + model_id (str): The model ID. This is the ARN of the model, if passed in as a separate param. + + Returns: + str: The AWS region name """ aws_region_name = optional_params.get("aws_region_name", None) ### SET REGION NAME ### if aws_region_name is None: # check model arn # - aws_region_name = self._get_aws_region_from_model_arn(model) + if model_id is not None: + aws_region_name = self._get_aws_region_from_model_arn(model_id) + else: + aws_region_name = self._get_aws_region_from_model_arn(model) # check env # litellm_aws_region_name = get_secret("AWS_REGION_NAME", None) diff --git a/litellm/llms/bedrock/chat/converse_handler.py b/litellm/llms/bedrock/chat/converse_handler.py index d45ab40c4b..c4c518cca5 100644 --- a/litellm/llms/bedrock/chat/converse_handler.py +++ b/litellm/llms/bedrock/chat/converse_handler.py @@ -268,23 +268,29 @@ class BedrockConverseLLM(BaseAWSLLM): ## SETUP ## stream = optional_params.pop("stream", None) - modelId = optional_params.pop("model_id", None) + unencoded_model_id = optional_params.pop("model_id", None) fake_stream = optional_params.pop("fake_stream", False) json_mode = optional_params.get("json_mode", False) - if modelId is not None: - modelId = self.encode_model_id(model_id=modelId) + if unencoded_model_id is not None: + modelId = self.encode_model_id(model_id=unencoded_model_id) else: modelId = self.encode_model_id(model_id=model) if stream is True and "ai21" in modelId: fake_stream = True + ### SET REGION NAME ### + aws_region_name = self._get_aws_region_name( + optional_params=optional_params, + model=model, + model_id=unencoded_model_id, + ) + ## CREDENTIALS ## # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them aws_secret_access_key = optional_params.pop("aws_secret_access_key", None) aws_access_key_id = optional_params.pop("aws_access_key_id", None) aws_session_token = optional_params.pop("aws_session_token", None) - aws_region_name = optional_params.pop("aws_region_name", None) aws_role_name = optional_params.pop("aws_role_name", None) aws_session_name = optional_params.pop("aws_session_name", None) aws_profile_name = optional_params.pop("aws_profile_name", None) @@ -293,25 +299,25 @@ class BedrockConverseLLM(BaseAWSLLM): ) # https://bedrock-runtime.{region_name}.amazonaws.com aws_web_identity_token = optional_params.pop("aws_web_identity_token", None) aws_sts_endpoint = optional_params.pop("aws_sts_endpoint", None) + optional_params.pop("aws_region_name", None) - ### SET REGION NAME ### - if aws_region_name is None: - # check env # - litellm_aws_region_name = get_secret("AWS_REGION_NAME", None) + # if aws_region_name is None: + # # check env # + # litellm_aws_region_name = get_secret("AWS_REGION_NAME", None) - if litellm_aws_region_name is not None and isinstance( - litellm_aws_region_name, str - ): - aws_region_name = litellm_aws_region_name + # if litellm_aws_region_name is not None and isinstance( + # litellm_aws_region_name, str + # ): + # aws_region_name = litellm_aws_region_name - standard_aws_region_name = get_secret("AWS_REGION", None) - if standard_aws_region_name is not None and isinstance( - standard_aws_region_name, str - ): - aws_region_name = standard_aws_region_name + # standard_aws_region_name = get_secret("AWS_REGION", None) + # if standard_aws_region_name is not None and isinstance( + # standard_aws_region_name, str + # ): + # aws_region_name = standard_aws_region_name - if aws_region_name is None: - aws_region_name = "us-west-2" + # if aws_region_name is None: + # aws_region_name = "us-west-2" litellm_params["aws_region_name"] = ( aws_region_name # [DO NOT DELETE] important for async calls diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py index 602992aee8..be6d31d511 100644 --- a/tests/llm_translation/test_bedrock_completion.py +++ b/tests/llm_translation/test_bedrock_completion.py @@ -2948,3 +2948,28 @@ async def test_bedrock_stream_thinking_content_openwebui(): assert ( len(response_content) > 0 ), "There should be non-empty content after thinking tags" + + +def test_bedrock_application_inference_profile(): + from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler + + client = HTTPHandler() + + with patch.object(client, "post") as mock_post: + try: + resp = completion( + model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", + messages=[{"role": "user", "content": "Hello, how are you?"}], + model_id="arn:aws:bedrock:eu-central-1:000000000000:application-inference-profile/a0a0a0a0a0a0", + client=client, + ) + except Exception as e: + print(e) + + mock_post.assert_called_once() + + print(mock_post.call_args.kwargs) + json_data = mock_post.call_args.kwargs["data"] + assert mock_post.call_args.kwargs["url"].startswith( + "https://bedrock-runtime.eu-central-1.amazonaws.com/" + )