fix(bedrock.py): support anthropic messages api on bedrock (claude-3)

This commit is contained in:
Krrish Dholakia 2024-03-04 17:15:35 -08:00
parent 0ac652a771
commit 478307d4cf
15 changed files with 381 additions and 307 deletions

View file

@ -5,7 +5,13 @@ import time
from typing import Callable, Optional, Any, Union, List
import litellm
from litellm.utils import ModelResponse, get_secret, Usage, ImageResponse
from .prompt_templates.factory import prompt_factory, custom_prompt
from .prompt_templates.factory import (
prompt_factory,
custom_prompt,
construct_tool_use_system_prompt,
extract_between_tags,
parse_xml_params,
)
import httpx
@ -81,7 +87,7 @@ class AmazonAnthropicClaude3Config:
"""
max_tokens: Optional[int] = litellm.max_tokens
anthropic_version: Optional[str] = None
anthropic_version: Optional[str] = "bedrock-2023-05-31"
def __init__(
self,
@ -111,6 +117,15 @@ class AmazonAnthropicClaude3Config:
and v is not None
}
def get_supported_openai_params(self):
return ["max_tokens"]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens"] = value
return optional_params
class AmazonAnthropicConfig:
"""
@ -165,6 +180,25 @@ class AmazonAnthropicConfig:
and v is not None
}
def get_supported_openai_params(
self,
):
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens_to_sample"] = value
if param == "temperature":
optional_params["temperature"] = value
if param == "top_p":
optional_params["top_p"] = value
if param == "stop":
optional_params["stop_sequences"] = value
if param == "stream" and value == True:
optional_params["stream"] = value
return optional_params
class AmazonCohereConfig:
"""
@ -664,7 +698,20 @@ def completion(
inference_params = copy.deepcopy(optional_params)
stream = inference_params.pop("stream", False)
if provider == "anthropic":
if model == "anthropic.claude-3":
if model.startswith("anthropic.claude-3"):
# Separate system prompt from rest of message
system_prompt_idx: Optional[int] = None
for idx, message in enumerate(messages):
if message["role"] == "system":
inference_params["system"] = message["content"]
system_prompt_idx = idx
break
if system_prompt_idx is not None:
messages.pop(system_prompt_idx)
# Format rest of message according to anthropic guidelines
messages = prompt_factory(
model=model, messages=messages, custom_llm_provider="anthropic"
)
## LOAD CONFIG
config = litellm.AmazonAnthropicClaude3Config.get_config()
for k, v in config.items():
@ -672,7 +719,17 @@ def completion(
k not in inference_params
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
inference_params[k] = v
data = json.dumps({"prompt": prompt, **inference_params})
## Handle Tool Calling
if "tools" in inference_params:
tool_calling_system_prompt = construct_tool_use_system_prompt(
tools=inference_params["tools"]
)
inference_params["system"] = (
inference_params.get("system", "\n")
+ tool_calling_system_prompt
) # add the anthropic tool calling prompt to the system prompt
inference_params.pop("tools")
data = json.dumps({"messages": messages, **inference_params})
else:
## LOAD CONFIG
config = litellm.AmazonAnthropicConfig.get_config()
@ -838,6 +895,10 @@ def completion(
if provider == "ai21":
outputText = response_body.get("completions")[0].get("data").get("text")
elif provider == "anthropic":
if model.startswith("anthropic.claude-3"):
outputText = response_body.get("content")[0].get("text", None)
model_response["finish_reason"] = response_body["stop_reason"]
else:
outputText = response_body["completion"]
model_response["finish_reason"] = response_body["stop_reason"]
elif provider == "cohere":

View file

@ -1,38 +1,284 @@
# @pytest.mark.skip(reason="AWS Suspended Account")
# import sys, os
# import traceback
# from dotenv import load_dotenv
#
# load_dotenv()
# import os, io
#
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path
# import pytest
# import litellm
# from litellm import embedding, completion, completion_cost, Timeout, ModelResponse
# from litellm import RateLimitError
#
# # litellm.num_retries = 3
# litellm.cache = None
# litellm.success_callback = []
# user_message = "Write a short poem about the sky"
# messages = [{"content": user_message, "role": "user"}]
#
#
# @pytest.fixture(autouse=True)
# def reset_callbacks():
# print("\npytest fixture - resetting callbacks")
# litellm.success_callback = []
# litellm._async_success_callback = []
# litellm.failure_callback = []
# litellm.callbacks = []
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion, completion_cost, Timeout, ModelResponse
from litellm import RateLimitError
# litellm.num_retries = 3
litellm.cache = None
litellm.success_callback = []
user_message = "Write a short poem about the sky"
messages = [{"content": user_message, "role": "user"}]
# def test_completion_bedrock_claude_completion_auth():
# print("calling bedrock claude completion params auth")
# import os
@pytest.fixture(autouse=True)
def reset_callbacks():
print("\npytest fixture - resetting callbacks")
litellm.success_callback = []
litellm._async_success_callback = []
litellm.failure_callback = []
litellm.callbacks = []
def test_completion_bedrock_claude_completion_auth():
print("calling bedrock claude completion params auth")
import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
try:
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name,
)
# Add any assertions here to check the response
print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_completion_auth()
def test_completion_bedrock_claude_2_1_completion_auth():
print("calling bedrock claude 2.1 completion params auth")
import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
try:
response = completion(
model="bedrock/anthropic.claude-v2:1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name,
)
# Add any assertions here to check the response
print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_2_1_completion_auth()
def test_completion_bedrock_claude_external_client_auth():
print("\ncalling bedrock claude external client auth")
import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
try:
import boto3
litellm.set_verbose = True
bedrock = boto3.client(
service_name="bedrock-runtime",
region_name=aws_region_name,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_bedrock_client=bedrock,
)
# Add any assertions here to check the response
print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_external_client_auth()
@pytest.mark.skip(reason="Expired token, need to renew")
def test_completion_bedrock_claude_sts_client_auth():
print("\ncalling bedrock claude external client auth")
import os
aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
try:
import boto3
litellm.set_verbose = True
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_region_name=aws_region_name,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
response = embedding(
model="cohere.embed-multilingual-v3",
input=["hello world"],
aws_region_name="us-east-1",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
response = completion(
model="gpt-3.5-turbo",
messages=messages,
aws_region_name="us-east-1",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
# Add any assertions here to check the response
print(response)
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_sts_client_auth()
def test_bedrock_claude_3():
try:
litellm.set_verbose = True
response: ModelResponse = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=messages,
max_tokens=10,
)
# Add any assertions here to check the response
assert len(response.choices) > 0
assert len(response.choices[0].message.content) > 0
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_provisioned_throughput():
try:
litellm.set_verbose = True
import botocore, json, io
import botocore.session
from botocore.stub import Stubber
bedrock_client = botocore.session.get_session().create_client(
"bedrock-runtime", region_name="us-east-1"
)
expected_params = {
"accept": "application/json",
"body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
'"max_tokens_to_sample": 256}',
"contentType": "application/json",
"modelId": "provisioned-model-arn",
}
response_from_bedrock = {
"body": io.StringIO(
json.dumps(
{
"completion": " Here is a short poem about the sky:",
"stop_reason": "max_tokens",
"stop": None,
}
)
),
"contentType": "contentType",
"ResponseMetadata": {"HTTPStatusCode": 200},
}
with Stubber(bedrock_client) as stubber:
stubber.add_response(
"invoke_model",
service_response=response_from_bedrock,
expected_params=expected_params,
)
response = litellm.completion(
model="bedrock/anthropic.claude-instant-v1",
model_id="provisioned-model-arn",
messages=[{"content": "Hello, how are you?", "role": "user"}],
aws_bedrock_client=bedrock_client,
)
print("response stubbed", response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_provisioned_throughput()
def test_completion_bedrock_mistral_completion_auth():
print("calling bedrock mistral completion params auth")
import os
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
@ -41,253 +287,24 @@
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
# try:
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_region_name=aws_region_name,
# )
# # Add any assertions here to check the response
# print(response)
try:
response: ModelResponse = completion(
model="bedrock/mistral.mistral-7b-instruct-v0:2",
messages=messages,
max_tokens=10,
temperature=0.1,
)
# Add any assertions here to check the response
assert len(response.choices) > 0
assert len(response.choices[0].message.content) > 0
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# # test_completion_bedrock_claude_completion_auth()
# def test_completion_bedrock_claude_2_1_completion_auth():
# print("calling bedrock claude 2.1 completion params auth")
# import os
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
# try:
# response = completion(
# model="bedrock/anthropic.claude-v2:1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_region_name=aws_region_name,
# )
# # Add any assertions here to check the response
# print(response)
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# # test_completion_bedrock_claude_2_1_completion_auth()
# def test_completion_bedrock_claude_external_client_auth():
# print("\ncalling bedrock claude external client auth")
# import os
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
# try:
# import boto3
# litellm.set_verbose = True
# bedrock = boto3.client(
# service_name="bedrock-runtime",
# region_name=aws_region_name,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
# )
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_bedrock_client=bedrock,
# )
# # Add any assertions here to check the response
# print(response)
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# # test_completion_bedrock_claude_external_client_auth()
# @pytest.mark.skip(reason="Expired token, need to renew")
# def test_completion_bedrock_claude_sts_client_auth():
# print("\ncalling bedrock claude external client auth")
# import os
# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# try:
# import boto3
# litellm.set_verbose = True
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_region_name=aws_region_name,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
# response = embedding(
# model="cohere.embed-multilingual-v3",
# input=["hello world"],
# aws_region_name="us-east-1",
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
# response = completion(
# model="gpt-3.5-turbo",
# messages=messages,
# aws_region_name="us-east-1",
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
# # Add any assertions here to check the response
# print(response)
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# # test_completion_bedrock_claude_sts_client_auth()
# def test_provisioned_throughput():
# try:
# litellm.set_verbose = True
# import botocore, json, io
# import botocore.session
# from botocore.stub import Stubber
# bedrock_client = botocore.session.get_session().create_client(
# "bedrock-runtime", region_name="us-east-1"
# )
# expected_params = {
# "accept": "application/json",
# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
# '"max_tokens_to_sample": 256}',
# "contentType": "application/json",
# "modelId": "provisioned-model-arn",
# }
# response_from_bedrock = {
# "body": io.StringIO(
# json.dumps(
# {
# "completion": " Here is a short poem about the sky:",
# "stop_reason": "max_tokens",
# "stop": None,
# }
# )
# ),
# "contentType": "contentType",
# "ResponseMetadata": {"HTTPStatusCode": 200},
# }
# with Stubber(bedrock_client) as stubber:
# stubber.add_response(
# "invoke_model",
# service_response=response_from_bedrock,
# expected_params=expected_params,
# )
# response = litellm.completion(
# model="bedrock/anthropic.claude-instant-v1",
# model_id="provisioned-model-arn",
# messages=[{"content": "Hello, how are you?", "role": "user"}],
# aws_bedrock_client=bedrock_client,
# )
# print("response stubbed", response)
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# # test_provisioned_throughput()
# def test_completion_bedrock_mistral_completion_auth():
# print("calling bedrock mistral completion params auth")
# import os
#
# # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# # aws_region_name = os.environ["AWS_REGION_NAME"]
#
# # os.environ.pop("AWS_ACCESS_KEY_ID", None)
# # os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# # os.environ.pop("AWS_REGION_NAME", None)
# try:
# response:ModelResponse = completion(
# model="bedrock/mistral.mistral-7b-instruct-v0:2",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# )
# # Add any assertions here to check the response
# assert len(response.choices) > 0
# assert len(response.choices[0].message.content) > 0
#
# # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# # os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
#
#
# test_completion_bedrock_mistral_completion_auth()

View file

@ -546,7 +546,6 @@ def test_redis_cache_acompletion_stream():
# test_redis_cache_acompletion_stream()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_redis_cache_acompletion_stream_bedrock():
import asyncio

View file

@ -1648,7 +1648,6 @@ def test_completion_chat_sagemaker_mistral():
# test_completion_chat_sagemaker_mistral()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_titan_null_response():
try:
response = completion(
@ -1674,7 +1673,6 @@ def test_completion_bedrock_titan_null_response():
pytest.fail(f"An error occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_titan():
try:
response = completion(
@ -1696,7 +1694,6 @@ def test_completion_bedrock_titan():
# test_completion_bedrock_titan()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_claude():
print("calling claude")
try:
@ -1718,7 +1715,6 @@ def test_completion_bedrock_claude():
# test_completion_bedrock_claude()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_cohere():
print("calling bedrock cohere")
litellm.set_verbose = True

View file

@ -171,7 +171,6 @@ def test_cost_openai_image_gen():
assert cost == 0.019922944
@pytest.mark.skip(reason="AWS Suspended Account")
def test_cost_bedrock_pricing():
"""
- get pricing specific to region for a model

View file

@ -478,7 +478,6 @@ async def test_async_chat_azure_stream():
## Test Bedrock + sync
@pytest.mark.skip(reason="AWS Suspended Account")
def test_chat_bedrock_stream():
try:
customHandler = CompletionCustomHandler()
@ -519,7 +518,6 @@ def test_chat_bedrock_stream():
## Test Bedrock + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_async_chat_bedrock_stream():
try:
@ -796,7 +794,6 @@ async def test_async_embedding_azure():
## Test Bedrock + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_async_embedding_bedrock():
try:

View file

@ -256,7 +256,6 @@ async def test_vertexai_aembedding():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding_titan():
try:
# this tests if we support str input for bedrock embedding
@ -302,7 +301,6 @@ def test_bedrock_embedding_titan():
# test_bedrock_embedding_titan()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding_cohere():
try:
litellm.set_verbose = False

View file

@ -121,7 +121,6 @@ async def test_async_image_generation_azure():
pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_image_generation_bedrock():
try:
litellm.set_verbose = True
@ -142,7 +141,6 @@ def test_image_generation_bedrock():
pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_aimage_generation_bedrock_with_optional_params():
try:

View file

@ -515,7 +515,6 @@ def sagemaker_test_completion():
# Bedrock
@pytest.mark.skip(reason="AWS Suspended Account")
def bedrock_test_completion():
litellm.AmazonCohereConfig(max_tokens=10)
# litellm.set_verbose=True

View file

@ -125,7 +125,6 @@ def test_embedding(client_no_auth):
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding(client_no_auth):
global headers
from litellm.proxy.proxy_server import user_custom_auth

View file

@ -575,7 +575,6 @@ def test_azure_embedding_on_router():
# test_azure_embedding_on_router()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_on_router():
litellm.set_verbose = True
print("\n Testing bedrock on router\n")

View file

@ -87,7 +87,6 @@ def test_router_timeouts():
print("********** TOKENS USED SO FAR = ", total_tokens_used)
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_router_timeouts_bedrock():
import openai

View file

@ -764,7 +764,6 @@ def test_completion_replicate_stream_bad_key():
# test_completion_replicate_stream_bad_key()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_claude_stream():
try:
litellm.set_verbose = False
@ -811,7 +810,6 @@ def test_completion_bedrock_claude_stream():
# test_completion_bedrock_claude_stream()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_ai21_stream():
try:
litellm.set_verbose = False

View file

@ -4513,20 +4513,24 @@ def get_optional_params(
if stream:
optional_params["stream"] = stream
elif "anthropic" in model:
supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
supported_params = get_mapped_model_params(
model=model, custom_llm_provider=custom_llm_provider
)
_check_valid_arg(supported_params=supported_params)
# anthropic params on bedrock
# \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
if max_tokens is not None:
optional_params["max_tokens_to_sample"] = max_tokens
if temperature is not None:
optional_params["temperature"] = temperature
if top_p is not None:
optional_params["top_p"] = top_p
if stop is not None:
optional_params["stop_sequences"] = stop
if stream:
optional_params["stream"] = stream
if model.startswith("anthropic.claude-3"):
optional_params = (
litellm.AmazonAnthropicClaude3Config.map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
)
)
else:
optional_params = litellm.AmazonAnthropicConfig.map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
)
elif "amazon" in model: # amazon titan llms
supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
_check_valid_arg(supported_params=supported_params)
@ -4991,6 +4995,17 @@ def get_optional_params(
return optional_params
def get_mapped_model_params(model: str, custom_llm_provider: str):
"""
Returns the supported openai params for a given model + provider
"""
if custom_llm_provider == "bedrock":
if model.startswith("anthropic.claude-3"):
return litellm.AmazonAnthropicClaude3Config().get_supported_openai_params()
else:
return litellm.AmazonAnthropicConfig().get_supported_openai_params()
def get_llm_provider(
model: str,
custom_llm_provider: Optional[str] = None,