From 478307d4cf5f21bd7bf9350a4e60b297243f55ad Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 4 Mar 2024 17:15:35 -0800 Subject: [PATCH] fix(bedrock.py): support anthropic messages api on bedrock (claude-3) --- litellm/llms/bedrock.py | 73 ++- litellm/tests/test_amazing_s3_logs.py | 2 +- litellm/tests/test_bedrock_completion.py | 557 +++++++++--------- litellm/tests/test_caching.py | 1 - litellm/tests/test_completion.py | 4 - litellm/tests/test_completion_cost.py | 1 - litellm/tests/test_custom_callback_input.py | 3 - litellm/tests/test_embedding.py | 2 - litellm/tests/test_image_generation.py | 2 - .../tests/test_provider_specific_config.py | 1 - litellm/tests/test_proxy_server.py | 1 - litellm/tests/test_router.py | 1 - litellm/tests/test_router_timeout.py | 1 - litellm/tests/test_streaming.py | 2 - litellm/utils.py | 37 +- 15 files changed, 381 insertions(+), 307 deletions(-) diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index 18920da4a0..a2d8accdfb 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -5,7 +5,13 @@ import time from typing import Callable, Optional, Any, Union, List import litellm from litellm.utils import ModelResponse, get_secret, Usage, ImageResponse -from .prompt_templates.factory import prompt_factory, custom_prompt +from .prompt_templates.factory import ( + prompt_factory, + custom_prompt, + construct_tool_use_system_prompt, + extract_between_tags, + parse_xml_params, +) import httpx @@ -81,7 +87,7 @@ class AmazonAnthropicClaude3Config: """ max_tokens: Optional[int] = litellm.max_tokens - anthropic_version: Optional[str] = None + anthropic_version: Optional[str] = "bedrock-2023-05-31" def __init__( self, @@ -111,6 +117,15 @@ class AmazonAnthropicClaude3Config: and v is not None } + def get_supported_openai_params(self): + return ["max_tokens"] + + def map_openai_params(self, non_default_params: dict, optional_params: dict): + for param, value in non_default_params.items(): + if param == "max_tokens": + optional_params["max_tokens"] = value + return optional_params + class AmazonAnthropicConfig: """ @@ -165,6 +180,25 @@ class AmazonAnthropicConfig: and v is not None } + def get_supported_openai_params( + self, + ): + return ["max_tokens", "temperature", "stop", "top_p", "stream"] + + def map_openai_params(self, non_default_params: dict, optional_params: dict): + for param, value in non_default_params.items(): + if param == "max_tokens": + optional_params["max_tokens_to_sample"] = value + if param == "temperature": + optional_params["temperature"] = value + if param == "top_p": + optional_params["top_p"] = value + if param == "stop": + optional_params["stop_sequences"] = value + if param == "stream" and value == True: + optional_params["stream"] = value + return optional_params + class AmazonCohereConfig: """ @@ -664,7 +698,20 @@ def completion( inference_params = copy.deepcopy(optional_params) stream = inference_params.pop("stream", False) if provider == "anthropic": - if model == "anthropic.claude-3": + if model.startswith("anthropic.claude-3"): + # Separate system prompt from rest of message + system_prompt_idx: Optional[int] = None + for idx, message in enumerate(messages): + if message["role"] == "system": + inference_params["system"] = message["content"] + system_prompt_idx = idx + break + if system_prompt_idx is not None: + messages.pop(system_prompt_idx) + # Format rest of message according to anthropic guidelines + messages = prompt_factory( + model=model, messages=messages, custom_llm_provider="anthropic" + ) ## LOAD CONFIG config = litellm.AmazonAnthropicClaude3Config.get_config() for k, v in config.items(): @@ -672,7 +719,17 @@ def completion( k not in inference_params ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in inference_params[k] = v - data = json.dumps({"prompt": prompt, **inference_params}) + ## Handle Tool Calling + if "tools" in inference_params: + tool_calling_system_prompt = construct_tool_use_system_prompt( + tools=inference_params["tools"] + ) + inference_params["system"] = ( + inference_params.get("system", "\n") + + tool_calling_system_prompt + ) # add the anthropic tool calling prompt to the system prompt + inference_params.pop("tools") + data = json.dumps({"messages": messages, **inference_params}) else: ## LOAD CONFIG config = litellm.AmazonAnthropicConfig.get_config() @@ -838,8 +895,12 @@ def completion( if provider == "ai21": outputText = response_body.get("completions")[0].get("data").get("text") elif provider == "anthropic": - outputText = response_body["completion"] - model_response["finish_reason"] = response_body["stop_reason"] + if model.startswith("anthropic.claude-3"): + outputText = response_body.get("content")[0].get("text", None) + model_response["finish_reason"] = response_body["stop_reason"] + else: + outputText = response_body["completion"] + model_response["finish_reason"] = response_body["stop_reason"] elif provider == "cohere": outputText = response_body["generations"][0]["text"] elif provider == "meta": diff --git a/litellm/tests/test_amazing_s3_logs.py b/litellm/tests/test_amazing_s3_logs.py index 74d6eb5b94..0ccc0bc15c 100644 --- a/litellm/tests/test_amazing_s3_logs.py +++ b/litellm/tests/test_amazing_s3_logs.py @@ -1,4 +1,4 @@ -## @pytest.mark.skip(reason="AWS Suspended Account") +# # @pytest.mark.skip(reason="AWS Suspended Account") # import sys # import os # import io, asyncio diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py index 3e3d8b6bbc..6843815086 100644 --- a/litellm/tests/test_bedrock_completion.py +++ b/litellm/tests/test_bedrock_completion.py @@ -1,293 +1,310 @@ # @pytest.mark.skip(reason="AWS Suspended Account") -# import sys, os -# import traceback -# from dotenv import load_dotenv -# -# load_dotenv() -# import os, io -# -# sys.path.insert( -# 0, os.path.abspath("../..") -# ) # Adds the parent directory to the system path -# import pytest -# import litellm -# from litellm import embedding, completion, completion_cost, Timeout, ModelResponse -# from litellm import RateLimitError -# -# # litellm.num_retries = 3 -# litellm.cache = None -# litellm.success_callback = [] -# user_message = "Write a short poem about the sky" -# messages = [{"content": user_message, "role": "user"}] -# -# -# @pytest.fixture(autouse=True) -# def reset_callbacks(): -# print("\npytest fixture - resetting callbacks") -# litellm.success_callback = [] -# litellm._async_success_callback = [] -# litellm.failure_callback = [] -# litellm.callbacks = [] +import sys, os +import traceback +from dotenv import load_dotenv + +load_dotenv() +import os, io + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import pytest +import litellm +from litellm import embedding, completion, completion_cost, Timeout, ModelResponse +from litellm import RateLimitError + +# litellm.num_retries = 3 +litellm.cache = None +litellm.success_callback = [] +user_message = "Write a short poem about the sky" +messages = [{"content": user_message, "role": "user"}] -# def test_completion_bedrock_claude_completion_auth(): -# print("calling bedrock claude completion params auth") -# import os - -# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] -# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] -# aws_region_name = os.environ["AWS_REGION_NAME"] - -# os.environ.pop("AWS_ACCESS_KEY_ID", None) -# os.environ.pop("AWS_SECRET_ACCESS_KEY", None) -# os.environ.pop("AWS_REGION_NAME", None) - -# try: -# response = completion( -# model="bedrock/anthropic.claude-instant-v1", -# messages=messages, -# max_tokens=10, -# temperature=0.1, -# aws_access_key_id=aws_access_key_id, -# aws_secret_access_key=aws_secret_access_key, -# aws_region_name=aws_region_name, -# ) -# # Add any assertions here to check the response -# print(response) - -# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id -# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key -# os.environ["AWS_REGION_NAME"] = aws_region_name -# except RateLimitError: -# pass -# except Exception as e: -# pytest.fail(f"Error occurred: {e}") +@pytest.fixture(autouse=True) +def reset_callbacks(): + print("\npytest fixture - resetting callbacks") + litellm.success_callback = [] + litellm._async_success_callback = [] + litellm.failure_callback = [] + litellm.callbacks = [] -# # test_completion_bedrock_claude_completion_auth() +def test_completion_bedrock_claude_completion_auth(): + print("calling bedrock claude completion params auth") + import os + + aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] + aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] + aws_region_name = os.environ["AWS_REGION_NAME"] + + os.environ.pop("AWS_ACCESS_KEY_ID", None) + os.environ.pop("AWS_SECRET_ACCESS_KEY", None) + os.environ.pop("AWS_REGION_NAME", None) + + try: + response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=messages, + max_tokens=10, + temperature=0.1, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_region_name=aws_region_name, + ) + # Add any assertions here to check the response + print(response) + + os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id + os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key + os.environ["AWS_REGION_NAME"] = aws_region_name + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") -# def test_completion_bedrock_claude_2_1_completion_auth(): -# print("calling bedrock claude 2.1 completion params auth") -# import os - -# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] -# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] -# aws_region_name = os.environ["AWS_REGION_NAME"] - -# os.environ.pop("AWS_ACCESS_KEY_ID", None) -# os.environ.pop("AWS_SECRET_ACCESS_KEY", None) -# os.environ.pop("AWS_REGION_NAME", None) -# try: -# response = completion( -# model="bedrock/anthropic.claude-v2:1", -# messages=messages, -# max_tokens=10, -# temperature=0.1, -# aws_access_key_id=aws_access_key_id, -# aws_secret_access_key=aws_secret_access_key, -# aws_region_name=aws_region_name, -# ) -# # Add any assertions here to check the response -# print(response) - -# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id -# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key -# os.environ["AWS_REGION_NAME"] = aws_region_name -# except RateLimitError: -# pass -# except Exception as e: -# pytest.fail(f"Error occurred: {e}") +# test_completion_bedrock_claude_completion_auth() -# # test_completion_bedrock_claude_2_1_completion_auth() +def test_completion_bedrock_claude_2_1_completion_auth(): + print("calling bedrock claude 2.1 completion params auth") + import os + + aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] + aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] + aws_region_name = os.environ["AWS_REGION_NAME"] + + os.environ.pop("AWS_ACCESS_KEY_ID", None) + os.environ.pop("AWS_SECRET_ACCESS_KEY", None) + os.environ.pop("AWS_REGION_NAME", None) + try: + response = completion( + model="bedrock/anthropic.claude-v2:1", + messages=messages, + max_tokens=10, + temperature=0.1, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_region_name=aws_region_name, + ) + # Add any assertions here to check the response + print(response) + + os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id + os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key + os.environ["AWS_REGION_NAME"] = aws_region_name + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") -# def test_completion_bedrock_claude_external_client_auth(): -# print("\ncalling bedrock claude external client auth") -# import os - -# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] -# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] -# aws_region_name = os.environ["AWS_REGION_NAME"] - -# os.environ.pop("AWS_ACCESS_KEY_ID", None) -# os.environ.pop("AWS_SECRET_ACCESS_KEY", None) -# os.environ.pop("AWS_REGION_NAME", None) - -# try: -# import boto3 - -# litellm.set_verbose = True - -# bedrock = boto3.client( -# service_name="bedrock-runtime", -# region_name=aws_region_name, -# aws_access_key_id=aws_access_key_id, -# aws_secret_access_key=aws_secret_access_key, -# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com", -# ) - -# response = completion( -# model="bedrock/anthropic.claude-instant-v1", -# messages=messages, -# max_tokens=10, -# temperature=0.1, -# aws_bedrock_client=bedrock, -# ) -# # Add any assertions here to check the response -# print(response) - -# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id -# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key -# os.environ["AWS_REGION_NAME"] = aws_region_name -# except RateLimitError: -# pass -# except Exception as e: -# pytest.fail(f"Error occurred: {e}") +# test_completion_bedrock_claude_2_1_completion_auth() -# # test_completion_bedrock_claude_external_client_auth() +def test_completion_bedrock_claude_external_client_auth(): + print("\ncalling bedrock claude external client auth") + import os + + aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] + aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] + aws_region_name = os.environ["AWS_REGION_NAME"] + + os.environ.pop("AWS_ACCESS_KEY_ID", None) + os.environ.pop("AWS_SECRET_ACCESS_KEY", None) + os.environ.pop("AWS_REGION_NAME", None) + + try: + import boto3 + + litellm.set_verbose = True + + bedrock = boto3.client( + service_name="bedrock-runtime", + region_name=aws_region_name, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com", + ) + + response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=messages, + max_tokens=10, + temperature=0.1, + aws_bedrock_client=bedrock, + ) + # Add any assertions here to check the response + print(response) + + os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id + os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key + os.environ["AWS_REGION_NAME"] = aws_region_name + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") -# @pytest.mark.skip(reason="Expired token, need to renew") -# def test_completion_bedrock_claude_sts_client_auth(): -# print("\ncalling bedrock claude external client auth") -# import os - -# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"] -# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"] -# aws_region_name = os.environ["AWS_REGION_NAME"] -# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] - -# try: -# import boto3 - -# litellm.set_verbose = True - -# response = completion( -# model="bedrock/anthropic.claude-instant-v1", -# messages=messages, -# max_tokens=10, -# temperature=0.1, -# aws_region_name=aws_region_name, -# aws_access_key_id=aws_access_key_id, -# aws_secret_access_key=aws_secret_access_key, -# aws_role_name=aws_role_name, -# aws_session_name="my-test-session", -# ) - -# response = embedding( -# model="cohere.embed-multilingual-v3", -# input=["hello world"], -# aws_region_name="us-east-1", -# aws_access_key_id=aws_access_key_id, -# aws_secret_access_key=aws_secret_access_key, -# aws_role_name=aws_role_name, -# aws_session_name="my-test-session", -# ) - -# response = completion( -# model="gpt-3.5-turbo", -# messages=messages, -# aws_region_name="us-east-1", -# aws_access_key_id=aws_access_key_id, -# aws_secret_access_key=aws_secret_access_key, -# aws_role_name=aws_role_name, -# aws_session_name="my-test-session", -# ) -# # Add any assertions here to check the response -# print(response) -# except RateLimitError: -# pass -# except Exception as e: -# pytest.fail(f"Error occurred: {e}") +# test_completion_bedrock_claude_external_client_auth() -# # test_completion_bedrock_claude_sts_client_auth() +@pytest.mark.skip(reason="Expired token, need to renew") +def test_completion_bedrock_claude_sts_client_auth(): + print("\ncalling bedrock claude external client auth") + import os + + aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"] + aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"] + aws_region_name = os.environ["AWS_REGION_NAME"] + aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] + + try: + import boto3 + + litellm.set_verbose = True + + response = completion( + model="bedrock/anthropic.claude-instant-v1", + messages=messages, + max_tokens=10, + temperature=0.1, + aws_region_name=aws_region_name, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_role_name=aws_role_name, + aws_session_name="my-test-session", + ) + + response = embedding( + model="cohere.embed-multilingual-v3", + input=["hello world"], + aws_region_name="us-east-1", + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_role_name=aws_role_name, + aws_session_name="my-test-session", + ) + + response = completion( + model="gpt-3.5-turbo", + messages=messages, + aws_region_name="us-east-1", + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_role_name=aws_role_name, + aws_session_name="my-test-session", + ) + # Add any assertions here to check the response + print(response) + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") -# def test_provisioned_throughput(): -# try: -# litellm.set_verbose = True -# import botocore, json, io -# import botocore.session -# from botocore.stub import Stubber - -# bedrock_client = botocore.session.get_session().create_client( -# "bedrock-runtime", region_name="us-east-1" -# ) - -# expected_params = { -# "accept": "application/json", -# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", ' -# '"max_tokens_to_sample": 256}', -# "contentType": "application/json", -# "modelId": "provisioned-model-arn", -# } -# response_from_bedrock = { -# "body": io.StringIO( -# json.dumps( -# { -# "completion": " Here is a short poem about the sky:", -# "stop_reason": "max_tokens", -# "stop": None, -# } -# ) -# ), -# "contentType": "contentType", -# "ResponseMetadata": {"HTTPStatusCode": 200}, -# } - -# with Stubber(bedrock_client) as stubber: -# stubber.add_response( -# "invoke_model", -# service_response=response_from_bedrock, -# expected_params=expected_params, -# ) -# response = litellm.completion( -# model="bedrock/anthropic.claude-instant-v1", -# model_id="provisioned-model-arn", -# messages=[{"content": "Hello, how are you?", "role": "user"}], -# aws_bedrock_client=bedrock_client, -# ) -# print("response stubbed", response) -# except Exception as e: -# pytest.fail(f"Error occurred: {e}") +# test_completion_bedrock_claude_sts_client_auth() -# # test_provisioned_throughput() +def test_bedrock_claude_3(): + try: + litellm.set_verbose = True + response: ModelResponse = completion( + model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + messages=messages, + max_tokens=10, + ) + # Add any assertions here to check the response + assert len(response.choices) > 0 + assert len(response.choices[0].message.content) > 0 + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") -# def test_completion_bedrock_mistral_completion_auth(): -# print("calling bedrock mistral completion params auth") -# import os -# -# # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] -# # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] -# # aws_region_name = os.environ["AWS_REGION_NAME"] -# -# # os.environ.pop("AWS_ACCESS_KEY_ID", None) -# # os.environ.pop("AWS_SECRET_ACCESS_KEY", None) -# # os.environ.pop("AWS_REGION_NAME", None) -# try: -# response:ModelResponse = completion( -# model="bedrock/mistral.mistral-7b-instruct-v0:2", -# messages=messages, -# max_tokens=10, -# temperature=0.1, -# ) -# # Add any assertions here to check the response -# assert len(response.choices) > 0 -# assert len(response.choices[0].message.content) > 0 -# -# # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id -# # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key -# # os.environ["AWS_REGION_NAME"] = aws_region_name -# except RateLimitError: -# pass -# except Exception as e: -# pytest.fail(f"Error occurred: {e}") -# -# -# test_completion_bedrock_mistral_completion_auth() \ No newline at end of file +def test_provisioned_throughput(): + try: + litellm.set_verbose = True + import botocore, json, io + import botocore.session + from botocore.stub import Stubber + + bedrock_client = botocore.session.get_session().create_client( + "bedrock-runtime", region_name="us-east-1" + ) + + expected_params = { + "accept": "application/json", + "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", ' + '"max_tokens_to_sample": 256}', + "contentType": "application/json", + "modelId": "provisioned-model-arn", + } + response_from_bedrock = { + "body": io.StringIO( + json.dumps( + { + "completion": " Here is a short poem about the sky:", + "stop_reason": "max_tokens", + "stop": None, + } + ) + ), + "contentType": "contentType", + "ResponseMetadata": {"HTTPStatusCode": 200}, + } + + with Stubber(bedrock_client) as stubber: + stubber.add_response( + "invoke_model", + service_response=response_from_bedrock, + expected_params=expected_params, + ) + response = litellm.completion( + model="bedrock/anthropic.claude-instant-v1", + model_id="provisioned-model-arn", + messages=[{"content": "Hello, how are you?", "role": "user"}], + aws_bedrock_client=bedrock_client, + ) + print("response stubbed", response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +# test_provisioned_throughput() + + +def test_completion_bedrock_mistral_completion_auth(): + print("calling bedrock mistral completion params auth") + import os + + # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] + # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] + # aws_region_name = os.environ["AWS_REGION_NAME"] + + # os.environ.pop("AWS_ACCESS_KEY_ID", None) + # os.environ.pop("AWS_SECRET_ACCESS_KEY", None) + # os.environ.pop("AWS_REGION_NAME", None) + try: + response: ModelResponse = completion( + model="bedrock/mistral.mistral-7b-instruct-v0:2", + messages=messages, + max_tokens=10, + temperature=0.1, + ) + # Add any assertions here to check the response + assert len(response.choices) > 0 + assert len(response.choices[0].message.content) > 0 + + # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id + # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key + # os.environ["AWS_REGION_NAME"] = aws_region_name + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +# test_completion_bedrock_mistral_completion_auth() diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index 3a7f969e5c..f649bff027 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -546,7 +546,6 @@ def test_redis_cache_acompletion_stream(): # test_redis_cache_acompletion_stream() -@pytest.mark.skip(reason="AWS Suspended Account") def test_redis_cache_acompletion_stream_bedrock(): import asyncio diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 1677e04cfd..36ca7b8b03 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1648,7 +1648,6 @@ def test_completion_chat_sagemaker_mistral(): # test_completion_chat_sagemaker_mistral() -@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_titan_null_response(): try: response = completion( @@ -1674,7 +1673,6 @@ def test_completion_bedrock_titan_null_response(): pytest.fail(f"An error occurred - {str(e)}") -@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_titan(): try: response = completion( @@ -1696,7 +1694,6 @@ def test_completion_bedrock_titan(): # test_completion_bedrock_titan() -@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_claude(): print("calling claude") try: @@ -1718,7 +1715,6 @@ def test_completion_bedrock_claude(): # test_completion_bedrock_claude() -@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_cohere(): print("calling bedrock cohere") litellm.set_verbose = True diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index 034048c633..947da71669 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -171,7 +171,6 @@ def test_cost_openai_image_gen(): assert cost == 0.019922944 -@pytest.mark.skip(reason="AWS Suspended Account") def test_cost_bedrock_pricing(): """ - get pricing specific to region for a model diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py index 683173b21e..9249333197 100644 --- a/litellm/tests/test_custom_callback_input.py +++ b/litellm/tests/test_custom_callback_input.py @@ -478,7 +478,6 @@ async def test_async_chat_azure_stream(): ## Test Bedrock + sync -@pytest.mark.skip(reason="AWS Suspended Account") def test_chat_bedrock_stream(): try: customHandler = CompletionCustomHandler() @@ -519,7 +518,6 @@ def test_chat_bedrock_stream(): ## Test Bedrock + Async -@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_async_chat_bedrock_stream(): try: @@ -796,7 +794,6 @@ async def test_async_embedding_azure(): ## Test Bedrock + Async -@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_async_embedding_bedrock(): try: diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 2c9de496c4..a2f71eb982 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -256,7 +256,6 @@ async def test_vertexai_aembedding(): pytest.fail(f"Error occurred: {e}") -@pytest.mark.skip(reason="AWS Suspended Account") def test_bedrock_embedding_titan(): try: # this tests if we support str input for bedrock embedding @@ -302,7 +301,6 @@ def test_bedrock_embedding_titan(): # test_bedrock_embedding_titan() -@pytest.mark.skip(reason="AWS Suspended Account") def test_bedrock_embedding_cohere(): try: litellm.set_verbose = False diff --git a/litellm/tests/test_image_generation.py b/litellm/tests/test_image_generation.py index 0672319a21..59ccaacd8d 100644 --- a/litellm/tests/test_image_generation.py +++ b/litellm/tests/test_image_generation.py @@ -121,7 +121,6 @@ async def test_async_image_generation_azure(): pytest.fail(f"An exception occurred - {str(e)}") -@pytest.mark.skip(reason="AWS Suspended Account") def test_image_generation_bedrock(): try: litellm.set_verbose = True @@ -142,7 +141,6 @@ def test_image_generation_bedrock(): pytest.fail(f"An exception occurred - {str(e)}") -@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_aimage_generation_bedrock_with_optional_params(): try: diff --git a/litellm/tests/test_provider_specific_config.py b/litellm/tests/test_provider_specific_config.py index dcb4dcb4c7..08a84b5604 100644 --- a/litellm/tests/test_provider_specific_config.py +++ b/litellm/tests/test_provider_specific_config.py @@ -515,7 +515,6 @@ def sagemaker_test_completion(): # Bedrock -@pytest.mark.skip(reason="AWS Suspended Account") def bedrock_test_completion(): litellm.AmazonCohereConfig(max_tokens=10) # litellm.set_verbose=True diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index 3db4a980a9..d5e8f09c68 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -125,7 +125,6 @@ def test_embedding(client_no_auth): pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}") -@pytest.mark.skip(reason="AWS Suspended Account") def test_bedrock_embedding(client_no_auth): global headers from litellm.proxy.proxy_server import user_custom_auth diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 7c182ee686..dc2076aa36 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -575,7 +575,6 @@ def test_azure_embedding_on_router(): # test_azure_embedding_on_router() -@pytest.mark.skip(reason="AWS Suspended Account") def test_bedrock_on_router(): litellm.set_verbose = True print("\n Testing bedrock on router\n") diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py index 3816c649e9..dff30113be 100644 --- a/litellm/tests/test_router_timeout.py +++ b/litellm/tests/test_router_timeout.py @@ -87,7 +87,6 @@ def test_router_timeouts(): print("********** TOKENS USED SO FAR = ", total_tokens_used) -@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_router_timeouts_bedrock(): import openai diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 5767a944b2..679413f3e8 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -764,7 +764,6 @@ def test_completion_replicate_stream_bad_key(): # test_completion_replicate_stream_bad_key() -@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_claude_stream(): try: litellm.set_verbose = False @@ -811,7 +810,6 @@ def test_completion_bedrock_claude_stream(): # test_completion_bedrock_claude_stream() -@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_ai21_stream(): try: litellm.set_verbose = False diff --git a/litellm/utils.py b/litellm/utils.py index 1aa1d37673..8393ea64c4 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4513,20 +4513,24 @@ def get_optional_params( if stream: optional_params["stream"] = stream elif "anthropic" in model: - supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"] + supported_params = get_mapped_model_params( + model=model, custom_llm_provider=custom_llm_provider + ) _check_valid_arg(supported_params=supported_params) # anthropic params on bedrock # \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}" - if max_tokens is not None: - optional_params["max_tokens_to_sample"] = max_tokens - if temperature is not None: - optional_params["temperature"] = temperature - if top_p is not None: - optional_params["top_p"] = top_p - if stop is not None: - optional_params["stop_sequences"] = stop - if stream: - optional_params["stream"] = stream + if model.startswith("anthropic.claude-3"): + optional_params = ( + litellm.AmazonAnthropicClaude3Config.map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + ) + ) + else: + optional_params = litellm.AmazonAnthropicConfig.map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + ) elif "amazon" in model: # amazon titan llms supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"] _check_valid_arg(supported_params=supported_params) @@ -4991,6 +4995,17 @@ def get_optional_params( return optional_params +def get_mapped_model_params(model: str, custom_llm_provider: str): + """ + Returns the supported openai params for a given model + provider + """ + if custom_llm_provider == "bedrock": + if model.startswith("anthropic.claude-3"): + return litellm.AmazonAnthropicClaude3Config().get_supported_openai_params() + else: + return litellm.AmazonAnthropicConfig().get_supported_openai_params() + + def get_llm_provider( model: str, custom_llm_provider: Optional[str] = None,