From 478307d4cf5f21bd7bf9350a4e60b297243f55ad Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 4 Mar 2024 17:15:35 -0800
Subject: [PATCH] fix(bedrock.py): support anthropic messages api on bedrock
 (claude-3)

---
 litellm/llms/bedrock.py                       |  73 ++-
 litellm/tests/test_amazing_s3_logs.py         |   2 +-
 litellm/tests/test_bedrock_completion.py      | 557 +++++++++---------
 litellm/tests/test_caching.py                 |   1 -
 litellm/tests/test_completion.py              |   4 -
 litellm/tests/test_completion_cost.py         |   1 -
 litellm/tests/test_custom_callback_input.py   |   3 -
 litellm/tests/test_embedding.py               |   2 -
 litellm/tests/test_image_generation.py        |   2 -
 .../tests/test_provider_specific_config.py    |   1 -
 litellm/tests/test_proxy_server.py            |   1 -
 litellm/tests/test_router.py                  |   1 -
 litellm/tests/test_router_timeout.py          |   1 -
 litellm/tests/test_streaming.py               |   2 -
 litellm/utils.py                              |  37 +-
 15 files changed, 381 insertions(+), 307 deletions(-)

diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py
index 18920da4a0..a2d8accdfb 100644
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@@ -5,7 +5,13 @@ import time
 from typing import Callable, Optional, Any, Union, List
 import litellm
 from litellm.utils import ModelResponse, get_secret, Usage, ImageResponse
-from .prompt_templates.factory import prompt_factory, custom_prompt
+from .prompt_templates.factory import (
+    prompt_factory,
+    custom_prompt,
+    construct_tool_use_system_prompt,
+    extract_between_tags,
+    parse_xml_params,
+)
 import httpx
 
 
@@ -81,7 +87,7 @@ class AmazonAnthropicClaude3Config:
     """
 
     max_tokens: Optional[int] = litellm.max_tokens
-    anthropic_version: Optional[str] = None
+    anthropic_version: Optional[str] = "bedrock-2023-05-31"
 
     def __init__(
         self,
@@ -111,6 +117,15 @@ class AmazonAnthropicClaude3Config:
             and v is not None
         }
 
+    def get_supported_openai_params(self):
+        return ["max_tokens"]
+
+    def map_openai_params(self, non_default_params: dict, optional_params: dict):
+        for param, value in non_default_params.items():
+            if param == "max_tokens":
+                optional_params["max_tokens"] = value
+        return optional_params
+
 
 class AmazonAnthropicConfig:
     """
@@ -165,6 +180,25 @@ class AmazonAnthropicConfig:
             and v is not None
         }
 
+    def get_supported_openai_params(
+        self,
+    ):
+        return ["max_tokens", "temperature", "stop", "top_p", "stream"]
+
+    def map_openai_params(self, non_default_params: dict, optional_params: dict):
+        for param, value in non_default_params.items():
+            if param == "max_tokens":
+                optional_params["max_tokens_to_sample"] = value
+            if param == "temperature":
+                optional_params["temperature"] = value
+            if param == "top_p":
+                optional_params["top_p"] = value
+            if param == "stop":
+                optional_params["stop_sequences"] = value
+            if param == "stream" and value == True:
+                optional_params["stream"] = value
+        return optional_params
+
 
 class AmazonCohereConfig:
     """
@@ -664,7 +698,20 @@ def completion(
         inference_params = copy.deepcopy(optional_params)
         stream = inference_params.pop("stream", False)
         if provider == "anthropic":
-            if model == "anthropic.claude-3":
+            if model.startswith("anthropic.claude-3"):
+                # Separate system prompt from rest of message
+                system_prompt_idx: Optional[int] = None
+                for idx, message in enumerate(messages):
+                    if message["role"] == "system":
+                        inference_params["system"] = message["content"]
+                        system_prompt_idx = idx
+                        break
+                if system_prompt_idx is not None:
+                    messages.pop(system_prompt_idx)
+                # Format rest of message according to anthropic guidelines
+                messages = prompt_factory(
+                    model=model, messages=messages, custom_llm_provider="anthropic"
+                )
                 ## LOAD CONFIG
                 config = litellm.AmazonAnthropicClaude3Config.get_config()
                 for k, v in config.items():
@@ -672,7 +719,17 @@ def completion(
                         k not in inference_params
                     ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                         inference_params[k] = v
-                data = json.dumps({"prompt": prompt, **inference_params})
+                ## Handle Tool Calling
+                if "tools" in inference_params:
+                    tool_calling_system_prompt = construct_tool_use_system_prompt(
+                        tools=inference_params["tools"]
+                    )
+                    inference_params["system"] = (
+                        inference_params.get("system", "\n")
+                        + tool_calling_system_prompt
+                    )  # add the anthropic tool calling prompt to the system prompt
+                    inference_params.pop("tools")
+                data = json.dumps({"messages": messages, **inference_params})
             else:
                 ## LOAD CONFIG
                 config = litellm.AmazonAnthropicConfig.get_config()
@@ -838,8 +895,12 @@ def completion(
         if provider == "ai21":
             outputText = response_body.get("completions")[0].get("data").get("text")
         elif provider == "anthropic":
-            outputText = response_body["completion"]
-            model_response["finish_reason"] = response_body["stop_reason"]
+            if model.startswith("anthropic.claude-3"):
+                outputText = response_body.get("content")[0].get("text", None)
+                model_response["finish_reason"] = response_body["stop_reason"]
+            else:
+                outputText = response_body["completion"]
+                model_response["finish_reason"] = response_body["stop_reason"]
         elif provider == "cohere":
             outputText = response_body["generations"][0]["text"]
         elif provider == "meta":
diff --git a/litellm/tests/test_amazing_s3_logs.py b/litellm/tests/test_amazing_s3_logs.py
index 74d6eb5b94..0ccc0bc15c 100644
--- a/litellm/tests/test_amazing_s3_logs.py
+++ b/litellm/tests/test_amazing_s3_logs.py
@@ -1,4 +1,4 @@
-## @pytest.mark.skip(reason="AWS Suspended Account")
+# # @pytest.mark.skip(reason="AWS Suspended Account")
 # import sys
 # import os
 # import io, asyncio
diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py
index 3e3d8b6bbc..6843815086 100644
--- a/litellm/tests/test_bedrock_completion.py
+++ b/litellm/tests/test_bedrock_completion.py
@@ -1,293 +1,310 @@
 # @pytest.mark.skip(reason="AWS Suspended Account")
-# import sys, os
-# import traceback
-# from dotenv import load_dotenv
-#
-# load_dotenv()
-# import os, io
-#
-# sys.path.insert(
-#     0, os.path.abspath("../..")
-# )  # Adds the parent directory to the system path
-# import pytest
-# import litellm
-# from litellm import embedding, completion, completion_cost, Timeout, ModelResponse
-# from litellm import RateLimitError
-#
-# # litellm.num_retries = 3
-# litellm.cache = None
-# litellm.success_callback = []
-# user_message = "Write a short poem about the sky"
-# messages = [{"content": user_message, "role": "user"}]
-#
-#
-# @pytest.fixture(autouse=True)
-# def reset_callbacks():
-#     print("\npytest fixture - resetting callbacks")
-#     litellm.success_callback = []
-#     litellm._async_success_callback = []
-#     litellm.failure_callback = []
-#     litellm.callbacks = []
+import sys, os
+import traceback
+from dotenv import load_dotenv
+
+load_dotenv()
+import os, io
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import pytest
+import litellm
+from litellm import embedding, completion, completion_cost, Timeout, ModelResponse
+from litellm import RateLimitError
+
+# litellm.num_retries = 3
+litellm.cache = None
+litellm.success_callback = []
+user_message = "Write a short poem about the sky"
+messages = [{"content": user_message, "role": "user"}]
 
 
-# def test_completion_bedrock_claude_completion_auth():
-#     print("calling bedrock claude completion params auth")
-#     import os
-
-#     aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
-#     aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
-#     aws_region_name = os.environ["AWS_REGION_NAME"]
-
-#     os.environ.pop("AWS_ACCESS_KEY_ID", None)
-#     os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
-#     os.environ.pop("AWS_REGION_NAME", None)
-
-#     try:
-#         response = completion(
-#             model="bedrock/anthropic.claude-instant-v1",
-#             messages=messages,
-#             max_tokens=10,
-#             temperature=0.1,
-#             aws_access_key_id=aws_access_key_id,
-#             aws_secret_access_key=aws_secret_access_key,
-#             aws_region_name=aws_region_name,
-#         )
-#         # Add any assertions here to check the response
-#         print(response)
-
-#         os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
-#         os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
-#         os.environ["AWS_REGION_NAME"] = aws_region_name
-#     except RateLimitError:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+@pytest.fixture(autouse=True)
+def reset_callbacks():
+    print("\npytest fixture - resetting callbacks")
+    litellm.success_callback = []
+    litellm._async_success_callback = []
+    litellm.failure_callback = []
+    litellm.callbacks = []
 
 
-# # test_completion_bedrock_claude_completion_auth()
+def test_completion_bedrock_claude_completion_auth():
+    print("calling bedrock claude completion params auth")
+    import os
+
+    aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
+    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
+    aws_region_name = os.environ["AWS_REGION_NAME"]
+
+    os.environ.pop("AWS_ACCESS_KEY_ID", None)
+    os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
+    os.environ.pop("AWS_REGION_NAME", None)
+
+    try:
+        response = completion(
+            model="bedrock/anthropic.claude-instant-v1",
+            messages=messages,
+            max_tokens=10,
+            temperature=0.1,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_region_name=aws_region_name,
+        )
+        # Add any assertions here to check the response
+        print(response)
+
+        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+        os.environ["AWS_REGION_NAME"] = aws_region_name
+    except RateLimitError:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
 
 
-# def test_completion_bedrock_claude_2_1_completion_auth():
-#     print("calling bedrock claude 2.1 completion params auth")
-#     import os
-
-#     aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
-#     aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
-#     aws_region_name = os.environ["AWS_REGION_NAME"]
-
-#     os.environ.pop("AWS_ACCESS_KEY_ID", None)
-#     os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
-#     os.environ.pop("AWS_REGION_NAME", None)
-#     try:
-#         response = completion(
-#             model="bedrock/anthropic.claude-v2:1",
-#             messages=messages,
-#             max_tokens=10,
-#             temperature=0.1,
-#             aws_access_key_id=aws_access_key_id,
-#             aws_secret_access_key=aws_secret_access_key,
-#             aws_region_name=aws_region_name,
-#         )
-#         # Add any assertions here to check the response
-#         print(response)
-
-#         os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
-#         os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
-#         os.environ["AWS_REGION_NAME"] = aws_region_name
-#     except RateLimitError:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+# test_completion_bedrock_claude_completion_auth()
 
 
-# # test_completion_bedrock_claude_2_1_completion_auth()
+def test_completion_bedrock_claude_2_1_completion_auth():
+    print("calling bedrock claude 2.1 completion params auth")
+    import os
+
+    aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
+    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
+    aws_region_name = os.environ["AWS_REGION_NAME"]
+
+    os.environ.pop("AWS_ACCESS_KEY_ID", None)
+    os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
+    os.environ.pop("AWS_REGION_NAME", None)
+    try:
+        response = completion(
+            model="bedrock/anthropic.claude-v2:1",
+            messages=messages,
+            max_tokens=10,
+            temperature=0.1,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_region_name=aws_region_name,
+        )
+        # Add any assertions here to check the response
+        print(response)
+
+        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+        os.environ["AWS_REGION_NAME"] = aws_region_name
+    except RateLimitError:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
 
 
-# def test_completion_bedrock_claude_external_client_auth():
-#     print("\ncalling bedrock claude external client auth")
-#     import os
-
-#     aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
-#     aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
-#     aws_region_name = os.environ["AWS_REGION_NAME"]
-
-#     os.environ.pop("AWS_ACCESS_KEY_ID", None)
-#     os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
-#     os.environ.pop("AWS_REGION_NAME", None)
-
-#     try:
-#         import boto3
-
-#         litellm.set_verbose = True
-
-#         bedrock = boto3.client(
-#             service_name="bedrock-runtime",
-#             region_name=aws_region_name,
-#             aws_access_key_id=aws_access_key_id,
-#             aws_secret_access_key=aws_secret_access_key,
-#             endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
-#         )
-
-#         response = completion(
-#             model="bedrock/anthropic.claude-instant-v1",
-#             messages=messages,
-#             max_tokens=10,
-#             temperature=0.1,
-#             aws_bedrock_client=bedrock,
-#         )
-#         # Add any assertions here to check the response
-#         print(response)
-
-#         os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
-#         os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
-#         os.environ["AWS_REGION_NAME"] = aws_region_name
-#     except RateLimitError:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+# test_completion_bedrock_claude_2_1_completion_auth()
 
 
-# # test_completion_bedrock_claude_external_client_auth()
+def test_completion_bedrock_claude_external_client_auth():
+    print("\ncalling bedrock claude external client auth")
+    import os
+
+    aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
+    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
+    aws_region_name = os.environ["AWS_REGION_NAME"]
+
+    os.environ.pop("AWS_ACCESS_KEY_ID", None)
+    os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
+    os.environ.pop("AWS_REGION_NAME", None)
+
+    try:
+        import boto3
+
+        litellm.set_verbose = True
+
+        bedrock = boto3.client(
+            service_name="bedrock-runtime",
+            region_name=aws_region_name,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
+        )
+
+        response = completion(
+            model="bedrock/anthropic.claude-instant-v1",
+            messages=messages,
+            max_tokens=10,
+            temperature=0.1,
+            aws_bedrock_client=bedrock,
+        )
+        # Add any assertions here to check the response
+        print(response)
+
+        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+        os.environ["AWS_REGION_NAME"] = aws_region_name
+    except RateLimitError:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
 
 
-# @pytest.mark.skip(reason="Expired token, need to renew")
-# def test_completion_bedrock_claude_sts_client_auth():
-#     print("\ncalling bedrock claude external client auth")
-#     import os
-
-#     aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
-#     aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
-#     aws_region_name = os.environ["AWS_REGION_NAME"]
-#     aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
-
-#     try:
-#         import boto3
-
-#         litellm.set_verbose = True
-
-#         response = completion(
-#             model="bedrock/anthropic.claude-instant-v1",
-#             messages=messages,
-#             max_tokens=10,
-#             temperature=0.1,
-#             aws_region_name=aws_region_name,
-#             aws_access_key_id=aws_access_key_id,
-#             aws_secret_access_key=aws_secret_access_key,
-#             aws_role_name=aws_role_name,
-#             aws_session_name="my-test-session",
-#         )
-
-#         response = embedding(
-#             model="cohere.embed-multilingual-v3",
-#             input=["hello world"],
-#             aws_region_name="us-east-1",
-#             aws_access_key_id=aws_access_key_id,
-#             aws_secret_access_key=aws_secret_access_key,
-#             aws_role_name=aws_role_name,
-#             aws_session_name="my-test-session",
-#         )
-
-#         response = completion(
-#             model="gpt-3.5-turbo",
-#             messages=messages,
-#             aws_region_name="us-east-1",
-#             aws_access_key_id=aws_access_key_id,
-#             aws_secret_access_key=aws_secret_access_key,
-#             aws_role_name=aws_role_name,
-#             aws_session_name="my-test-session",
-#         )
-#         # Add any assertions here to check the response
-#         print(response)
-#     except RateLimitError:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+# test_completion_bedrock_claude_external_client_auth()
 
 
-# # test_completion_bedrock_claude_sts_client_auth()
+@pytest.mark.skip(reason="Expired token, need to renew")
+def test_completion_bedrock_claude_sts_client_auth():
+    print("\ncalling bedrock claude external client auth")
+    import os
+
+    aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
+    aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
+    aws_region_name = os.environ["AWS_REGION_NAME"]
+    aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
+
+    try:
+        import boto3
+
+        litellm.set_verbose = True
+
+        response = completion(
+            model="bedrock/anthropic.claude-instant-v1",
+            messages=messages,
+            max_tokens=10,
+            temperature=0.1,
+            aws_region_name=aws_region_name,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_role_name=aws_role_name,
+            aws_session_name="my-test-session",
+        )
+
+        response = embedding(
+            model="cohere.embed-multilingual-v3",
+            input=["hello world"],
+            aws_region_name="us-east-1",
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_role_name=aws_role_name,
+            aws_session_name="my-test-session",
+        )
+
+        response = completion(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            aws_region_name="us-east-1",
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_role_name=aws_role_name,
+            aws_session_name="my-test-session",
+        )
+        # Add any assertions here to check the response
+        print(response)
+    except RateLimitError:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
 
 
-# def test_provisioned_throughput():
-#     try:
-#         litellm.set_verbose = True
-#         import botocore, json, io
-#         import botocore.session
-#         from botocore.stub import Stubber
-
-#         bedrock_client = botocore.session.get_session().create_client(
-#             "bedrock-runtime", region_name="us-east-1"
-#         )
-
-#         expected_params = {
-#             "accept": "application/json",
-#             "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
-#             '"max_tokens_to_sample": 256}',
-#             "contentType": "application/json",
-#             "modelId": "provisioned-model-arn",
-#         }
-#         response_from_bedrock = {
-#             "body": io.StringIO(
-#                 json.dumps(
-#                     {
-#                         "completion": " Here is a short poem about the sky:",
-#                         "stop_reason": "max_tokens",
-#                         "stop": None,
-#                     }
-#                 )
-#             ),
-#             "contentType": "contentType",
-#             "ResponseMetadata": {"HTTPStatusCode": 200},
-#         }
-
-#         with Stubber(bedrock_client) as stubber:
-#             stubber.add_response(
-#                 "invoke_model",
-#                 service_response=response_from_bedrock,
-#                 expected_params=expected_params,
-#             )
-#             response = litellm.completion(
-#                 model="bedrock/anthropic.claude-instant-v1",
-#                 model_id="provisioned-model-arn",
-#                 messages=[{"content": "Hello, how are you?", "role": "user"}],
-#                 aws_bedrock_client=bedrock_client,
-#             )
-#             print("response stubbed", response)
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
+# test_completion_bedrock_claude_sts_client_auth()
 
 
-# # test_provisioned_throughput()
+def test_bedrock_claude_3():
+    try:
+        litellm.set_verbose = True
+        response: ModelResponse = completion(
+            model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+            messages=messages,
+            max_tokens=10,
+        )
+        # Add any assertions here to check the response
+        assert len(response.choices) > 0
+        assert len(response.choices[0].message.content) > 0
+    except RateLimitError:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
 
 
-# def test_completion_bedrock_mistral_completion_auth():
-#     print("calling bedrock mistral completion params auth")
-#     import os
-#
-#     # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
-#     # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
-#     # aws_region_name = os.environ["AWS_REGION_NAME"]
-#
-#     # os.environ.pop("AWS_ACCESS_KEY_ID", None)
-#     # os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
-#     # os.environ.pop("AWS_REGION_NAME", None)
-#     try:
-#         response:ModelResponse = completion(
-#             model="bedrock/mistral.mistral-7b-instruct-v0:2",
-#             messages=messages,
-#             max_tokens=10,
-#             temperature=0.1,
-#         )
-#         # Add any assertions here to check the response
-#         assert len(response.choices) > 0
-#         assert len(response.choices[0].message.content) > 0
-#
-#         # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
-#         # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
-#         # os.environ["AWS_REGION_NAME"] = aws_region_name
-#     except RateLimitError:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
-#
-#
-# test_completion_bedrock_mistral_completion_auth()
\ No newline at end of file
+def test_provisioned_throughput():
+    try:
+        litellm.set_verbose = True
+        import botocore, json, io
+        import botocore.session
+        from botocore.stub import Stubber
+
+        bedrock_client = botocore.session.get_session().create_client(
+            "bedrock-runtime", region_name="us-east-1"
+        )
+
+        expected_params = {
+            "accept": "application/json",
+            "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
+            '"max_tokens_to_sample": 256}',
+            "contentType": "application/json",
+            "modelId": "provisioned-model-arn",
+        }
+        response_from_bedrock = {
+            "body": io.StringIO(
+                json.dumps(
+                    {
+                        "completion": " Here is a short poem about the sky:",
+                        "stop_reason": "max_tokens",
+                        "stop": None,
+                    }
+                )
+            ),
+            "contentType": "contentType",
+            "ResponseMetadata": {"HTTPStatusCode": 200},
+        }
+
+        with Stubber(bedrock_client) as stubber:
+            stubber.add_response(
+                "invoke_model",
+                service_response=response_from_bedrock,
+                expected_params=expected_params,
+            )
+            response = litellm.completion(
+                model="bedrock/anthropic.claude-instant-v1",
+                model_id="provisioned-model-arn",
+                messages=[{"content": "Hello, how are you?", "role": "user"}],
+                aws_bedrock_client=bedrock_client,
+            )
+            print("response stubbed", response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+# test_provisioned_throughput()
+
+
+def test_completion_bedrock_mistral_completion_auth():
+    print("calling bedrock mistral completion params auth")
+    import os
+
+    # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
+    # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
+    # aws_region_name = os.environ["AWS_REGION_NAME"]
+
+    # os.environ.pop("AWS_ACCESS_KEY_ID", None)
+    # os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
+    # os.environ.pop("AWS_REGION_NAME", None)
+    try:
+        response: ModelResponse = completion(
+            model="bedrock/mistral.mistral-7b-instruct-v0:2",
+            messages=messages,
+            max_tokens=10,
+            temperature=0.1,
+        )
+        # Add any assertions here to check the response
+        assert len(response.choices) > 0
+        assert len(response.choices[0].message.content) > 0
+
+        # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+        # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+        # os.environ["AWS_REGION_NAME"] = aws_region_name
+    except RateLimitError:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+# test_completion_bedrock_mistral_completion_auth()
diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py
index 3a7f969e5c..f649bff027 100644
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@@ -546,7 +546,6 @@ def test_redis_cache_acompletion_stream():
 # test_redis_cache_acompletion_stream()
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_redis_cache_acompletion_stream_bedrock():
     import asyncio
 
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 1677e04cfd..36ca7b8b03 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1648,7 +1648,6 @@ def test_completion_chat_sagemaker_mistral():
 # test_completion_chat_sagemaker_mistral()
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_titan_null_response():
     try:
         response = completion(
@@ -1674,7 +1673,6 @@ def test_completion_bedrock_titan_null_response():
         pytest.fail(f"An error occurred - {str(e)}")
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_titan():
     try:
         response = completion(
@@ -1696,7 +1694,6 @@ def test_completion_bedrock_titan():
 # test_completion_bedrock_titan()
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_claude():
     print("calling claude")
     try:
@@ -1718,7 +1715,6 @@ def test_completion_bedrock_claude():
 # test_completion_bedrock_claude()
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_cohere():
     print("calling bedrock cohere")
     litellm.set_verbose = True
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
index 034048c633..947da71669 100644
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@@ -171,7 +171,6 @@ def test_cost_openai_image_gen():
     assert cost == 0.019922944
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_cost_bedrock_pricing():
     """
     - get pricing specific to region for a model
diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py
index 683173b21e..9249333197 100644
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@@ -478,7 +478,6 @@ async def test_async_chat_azure_stream():
 
 
 ## Test Bedrock + sync
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_chat_bedrock_stream():
     try:
         customHandler = CompletionCustomHandler()
@@ -519,7 +518,6 @@ def test_chat_bedrock_stream():
 
 
 ## Test Bedrock + Async
-@pytest.mark.skip(reason="AWS Suspended Account")
 @pytest.mark.asyncio
 async def test_async_chat_bedrock_stream():
     try:
@@ -796,7 +794,6 @@ async def test_async_embedding_azure():
 
 
 ## Test Bedrock + Async
-@pytest.mark.skip(reason="AWS Suspended Account")
 @pytest.mark.asyncio
 async def test_async_embedding_bedrock():
     try:
diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
index 2c9de496c4..a2f71eb982 100644
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@@ -256,7 +256,6 @@ async def test_vertexai_aembedding():
         pytest.fail(f"Error occurred: {e}")
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_bedrock_embedding_titan():
     try:
         # this tests if we support str input for bedrock embedding
@@ -302,7 +301,6 @@ def test_bedrock_embedding_titan():
 # test_bedrock_embedding_titan()
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_bedrock_embedding_cohere():
     try:
         litellm.set_verbose = False
diff --git a/litellm/tests/test_image_generation.py b/litellm/tests/test_image_generation.py
index 0672319a21..59ccaacd8d 100644
--- a/litellm/tests/test_image_generation.py
+++ b/litellm/tests/test_image_generation.py
@@ -121,7 +121,6 @@ async def test_async_image_generation_azure():
             pytest.fail(f"An exception occurred - {str(e)}")
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_image_generation_bedrock():
     try:
         litellm.set_verbose = True
@@ -142,7 +141,6 @@ def test_image_generation_bedrock():
             pytest.fail(f"An exception occurred - {str(e)}")
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 @pytest.mark.asyncio
 async def test_aimage_generation_bedrock_with_optional_params():
     try:
diff --git a/litellm/tests/test_provider_specific_config.py b/litellm/tests/test_provider_specific_config.py
index dcb4dcb4c7..08a84b5604 100644
--- a/litellm/tests/test_provider_specific_config.py
+++ b/litellm/tests/test_provider_specific_config.py
@@ -515,7 +515,6 @@ def sagemaker_test_completion():
 #  Bedrock
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def bedrock_test_completion():
     litellm.AmazonCohereConfig(max_tokens=10)
     # litellm.set_verbose=True
diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py
index 3db4a980a9..d5e8f09c68 100644
--- a/litellm/tests/test_proxy_server.py
+++ b/litellm/tests/test_proxy_server.py
@@ -125,7 +125,6 @@ def test_embedding(client_no_auth):
         pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_bedrock_embedding(client_no_auth):
     global headers
     from litellm.proxy.proxy_server import user_custom_auth
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 7c182ee686..dc2076aa36 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -575,7 +575,6 @@ def test_azure_embedding_on_router():
 # test_azure_embedding_on_router()
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_bedrock_on_router():
     litellm.set_verbose = True
     print("\n Testing bedrock on router\n")
diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py
index 3816c649e9..dff30113be 100644
--- a/litellm/tests/test_router_timeout.py
+++ b/litellm/tests/test_router_timeout.py
@@ -87,7 +87,6 @@ def test_router_timeouts():
         print("********** TOKENS USED SO FAR = ", total_tokens_used)
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 @pytest.mark.asyncio
 async def test_router_timeouts_bedrock():
     import openai
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 5767a944b2..679413f3e8 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -764,7 +764,6 @@ def test_completion_replicate_stream_bad_key():
 # test_completion_replicate_stream_bad_key()
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_claude_stream():
     try:
         litellm.set_verbose = False
@@ -811,7 +810,6 @@ def test_completion_bedrock_claude_stream():
 # test_completion_bedrock_claude_stream()
 
 
-@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_ai21_stream():
     try:
         litellm.set_verbose = False
diff --git a/litellm/utils.py b/litellm/utils.py
index 1aa1d37673..8393ea64c4 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4513,20 +4513,24 @@ def get_optional_params(
             if stream:
                 optional_params["stream"] = stream
         elif "anthropic" in model:
-            supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
+            supported_params = get_mapped_model_params(
+                model=model, custom_llm_provider=custom_llm_provider
+            )
             _check_valid_arg(supported_params=supported_params)
             # anthropic params on bedrock
             # \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
-            if max_tokens is not None:
-                optional_params["max_tokens_to_sample"] = max_tokens
-            if temperature is not None:
-                optional_params["temperature"] = temperature
-            if top_p is not None:
-                optional_params["top_p"] = top_p
-            if stop is not None:
-                optional_params["stop_sequences"] = stop
-            if stream:
-                optional_params["stream"] = stream
+            if model.startswith("anthropic.claude-3"):
+                optional_params = (
+                    litellm.AmazonAnthropicClaude3Config.map_openai_params(
+                        non_default_params=non_default_params,
+                        optional_params=optional_params,
+                    )
+                )
+            else:
+                optional_params = litellm.AmazonAnthropicConfig.map_openai_params(
+                    non_default_params=non_default_params,
+                    optional_params=optional_params,
+                )
         elif "amazon" in model:  # amazon titan llms
             supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
             _check_valid_arg(supported_params=supported_params)
@@ -4991,6 +4995,17 @@ def get_optional_params(
     return optional_params
 
 
+def get_mapped_model_params(model: str, custom_llm_provider: str):
+    """
+    Returns the supported openai params for a given model + provider
+    """
+    if custom_llm_provider == "bedrock":
+        if model.startswith("anthropic.claude-3"):
+            return litellm.AmazonAnthropicClaude3Config().get_supported_openai_params()
+        else:
+            return litellm.AmazonAnthropicConfig().get_supported_openai_params()
+
+
 def get_llm_provider(
     model: str,
     custom_llm_provider: Optional[str] = None,