forked from phoenix/litellm-mirror
fix(bedrock.py): support anthropic messages api on bedrock (claude-3)
This commit is contained in:
parent
0ac652a771
commit
478307d4cf
15 changed files with 381 additions and 307 deletions
|
@ -5,7 +5,13 @@ import time
|
|||
from typing import Callable, Optional, Any, Union, List
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, get_secret, Usage, ImageResponse
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from .prompt_templates.factory import (
|
||||
prompt_factory,
|
||||
custom_prompt,
|
||||
construct_tool_use_system_prompt,
|
||||
extract_between_tags,
|
||||
parse_xml_params,
|
||||
)
|
||||
import httpx
|
||||
|
||||
|
||||
|
@ -81,7 +87,7 @@ class AmazonAnthropicClaude3Config:
|
|||
"""
|
||||
|
||||
max_tokens: Optional[int] = litellm.max_tokens
|
||||
anthropic_version: Optional[str] = None
|
||||
anthropic_version: Optional[str] = "bedrock-2023-05-31"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -111,6 +117,15 @@ class AmazonAnthropicClaude3Config:
|
|||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return ["max_tokens"]
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
return optional_params
|
||||
|
||||
|
||||
class AmazonAnthropicConfig:
|
||||
"""
|
||||
|
@ -165,6 +180,25 @@ class AmazonAnthropicConfig:
|
|||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(
|
||||
self,
|
||||
):
|
||||
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
optional_params["max_tokens_to_sample"] = value
|
||||
if param == "temperature":
|
||||
optional_params["temperature"] = value
|
||||
if param == "top_p":
|
||||
optional_params["top_p"] = value
|
||||
if param == "stop":
|
||||
optional_params["stop_sequences"] = value
|
||||
if param == "stream" and value == True:
|
||||
optional_params["stream"] = value
|
||||
return optional_params
|
||||
|
||||
|
||||
class AmazonCohereConfig:
|
||||
"""
|
||||
|
@ -664,7 +698,20 @@ def completion(
|
|||
inference_params = copy.deepcopy(optional_params)
|
||||
stream = inference_params.pop("stream", False)
|
||||
if provider == "anthropic":
|
||||
if model == "anthropic.claude-3":
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
# Separate system prompt from rest of message
|
||||
system_prompt_idx: Optional[int] = None
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
inference_params["system"] = message["content"]
|
||||
system_prompt_idx = idx
|
||||
break
|
||||
if system_prompt_idx is not None:
|
||||
messages.pop(system_prompt_idx)
|
||||
# Format rest of message according to anthropic guidelines
|
||||
messages = prompt_factory(
|
||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||
)
|
||||
## LOAD CONFIG
|
||||
config = litellm.AmazonAnthropicClaude3Config.get_config()
|
||||
for k, v in config.items():
|
||||
|
@ -672,7 +719,17 @@ def completion(
|
|||
k not in inference_params
|
||||
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
inference_params[k] = v
|
||||
data = json.dumps({"prompt": prompt, **inference_params})
|
||||
## Handle Tool Calling
|
||||
if "tools" in inference_params:
|
||||
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
||||
tools=inference_params["tools"]
|
||||
)
|
||||
inference_params["system"] = (
|
||||
inference_params.get("system", "\n")
|
||||
+ tool_calling_system_prompt
|
||||
) # add the anthropic tool calling prompt to the system prompt
|
||||
inference_params.pop("tools")
|
||||
data = json.dumps({"messages": messages, **inference_params})
|
||||
else:
|
||||
## LOAD CONFIG
|
||||
config = litellm.AmazonAnthropicConfig.get_config()
|
||||
|
@ -838,6 +895,10 @@ def completion(
|
|||
if provider == "ai21":
|
||||
outputText = response_body.get("completions")[0].get("data").get("text")
|
||||
elif provider == "anthropic":
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
outputText = response_body.get("content")[0].get("text", None)
|
||||
model_response["finish_reason"] = response_body["stop_reason"]
|
||||
else:
|
||||
outputText = response_body["completion"]
|
||||
model_response["finish_reason"] = response_body["stop_reason"]
|
||||
elif provider == "cohere":
|
||||
|
|
|
@ -1,38 +1,284 @@
|
|||
# @pytest.mark.skip(reason="AWS Suspended Account")
|
||||
# import sys, os
|
||||
# import traceback
|
||||
# from dotenv import load_dotenv
|
||||
#
|
||||
# load_dotenv()
|
||||
# import os, io
|
||||
#
|
||||
# sys.path.insert(
|
||||
# 0, os.path.abspath("../..")
|
||||
# ) # Adds the parent directory to the system path
|
||||
# import pytest
|
||||
# import litellm
|
||||
# from litellm import embedding, completion, completion_cost, Timeout, ModelResponse
|
||||
# from litellm import RateLimitError
|
||||
#
|
||||
# # litellm.num_retries = 3
|
||||
# litellm.cache = None
|
||||
# litellm.success_callback = []
|
||||
# user_message = "Write a short poem about the sky"
|
||||
# messages = [{"content": user_message, "role": "user"}]
|
||||
#
|
||||
#
|
||||
# @pytest.fixture(autouse=True)
|
||||
# def reset_callbacks():
|
||||
# print("\npytest fixture - resetting callbacks")
|
||||
# litellm.success_callback = []
|
||||
# litellm._async_success_callback = []
|
||||
# litellm.failure_callback = []
|
||||
# litellm.callbacks = []
|
||||
import sys, os
|
||||
import traceback
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import os, io
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
import litellm
|
||||
from litellm import embedding, completion, completion_cost, Timeout, ModelResponse
|
||||
from litellm import RateLimitError
|
||||
|
||||
# litellm.num_retries = 3
|
||||
litellm.cache = None
|
||||
litellm.success_callback = []
|
||||
user_message = "Write a short poem about the sky"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
|
||||
# def test_completion_bedrock_claude_completion_auth():
|
||||
# print("calling bedrock claude completion params auth")
|
||||
# import os
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_callbacks():
|
||||
print("\npytest fixture - resetting callbacks")
|
||||
litellm.success_callback = []
|
||||
litellm._async_success_callback = []
|
||||
litellm.failure_callback = []
|
||||
litellm.callbacks = []
|
||||
|
||||
|
||||
def test_completion_bedrock_claude_completion_auth():
|
||||
print("calling bedrock claude completion params auth")
|
||||
import os
|
||||
|
||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
|
||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
os.environ.pop("AWS_REGION_NAME", None)
|
||||
|
||||
try:
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_region_name=aws_region_name,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_bedrock_claude_completion_auth()
|
||||
|
||||
|
||||
def test_completion_bedrock_claude_2_1_completion_auth():
|
||||
print("calling bedrock claude 2.1 completion params auth")
|
||||
import os
|
||||
|
||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
|
||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
os.environ.pop("AWS_REGION_NAME", None)
|
||||
try:
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-v2:1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_region_name=aws_region_name,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_bedrock_claude_2_1_completion_auth()
|
||||
|
||||
|
||||
def test_completion_bedrock_claude_external_client_auth():
|
||||
print("\ncalling bedrock claude external client auth")
|
||||
import os
|
||||
|
||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
|
||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
os.environ.pop("AWS_REGION_NAME", None)
|
||||
|
||||
try:
|
||||
import boto3
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
bedrock = boto3.client(
|
||||
service_name="bedrock-runtime",
|
||||
region_name=aws_region_name,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
|
||||
)
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_bedrock_client=bedrock,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_bedrock_claude_external_client_auth()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Expired token, need to renew")
|
||||
def test_completion_bedrock_claude_sts_client_auth():
|
||||
print("\ncalling bedrock claude external client auth")
|
||||
import os
|
||||
|
||||
aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
|
||||
aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
|
||||
try:
|
||||
import boto3
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_region_name=aws_region_name,
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
|
||||
response = embedding(
|
||||
model="cohere.embed-multilingual-v3",
|
||||
input=["hello world"],
|
||||
aws_region_name="us-east-1",
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=messages,
|
||||
aws_region_name="us-east-1",
|
||||
aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_bedrock_claude_sts_client_auth()
|
||||
|
||||
|
||||
def test_bedrock_claude_3():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
response: ModelResponse = completion(
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
assert len(response.choices) > 0
|
||||
assert len(response.choices[0].message.content) > 0
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_provisioned_throughput():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
import botocore, json, io
|
||||
import botocore.session
|
||||
from botocore.stub import Stubber
|
||||
|
||||
bedrock_client = botocore.session.get_session().create_client(
|
||||
"bedrock-runtime", region_name="us-east-1"
|
||||
)
|
||||
|
||||
expected_params = {
|
||||
"accept": "application/json",
|
||||
"body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
|
||||
'"max_tokens_to_sample": 256}',
|
||||
"contentType": "application/json",
|
||||
"modelId": "provisioned-model-arn",
|
||||
}
|
||||
response_from_bedrock = {
|
||||
"body": io.StringIO(
|
||||
json.dumps(
|
||||
{
|
||||
"completion": " Here is a short poem about the sky:",
|
||||
"stop_reason": "max_tokens",
|
||||
"stop": None,
|
||||
}
|
||||
)
|
||||
),
|
||||
"contentType": "contentType",
|
||||
"ResponseMetadata": {"HTTPStatusCode": 200},
|
||||
}
|
||||
|
||||
with Stubber(bedrock_client) as stubber:
|
||||
stubber.add_response(
|
||||
"invoke_model",
|
||||
service_response=response_from_bedrock,
|
||||
expected_params=expected_params,
|
||||
)
|
||||
response = litellm.completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
model_id="provisioned-model-arn",
|
||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||
aws_bedrock_client=bedrock_client,
|
||||
)
|
||||
print("response stubbed", response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_provisioned_throughput()
|
||||
|
||||
|
||||
def test_completion_bedrock_mistral_completion_auth():
|
||||
print("calling bedrock mistral completion params auth")
|
||||
import os
|
||||
|
||||
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
|
@ -41,253 +287,24 @@
|
|||
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
# os.environ.pop("AWS_REGION_NAME", None)
|
||||
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="bedrock/anthropic.claude-instant-v1",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_region_name=aws_region_name,
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
try:
|
||||
response: ModelResponse = completion(
|
||||
model="bedrock/mistral.mistral-7b-instruct-v0:2",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
assert len(response.choices) > 0
|
||||
assert len(response.choices[0].message.content) > 0
|
||||
|
||||
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# # test_completion_bedrock_claude_completion_auth()
|
||||
|
||||
|
||||
# def test_completion_bedrock_claude_2_1_completion_auth():
|
||||
# print("calling bedrock claude 2.1 completion params auth")
|
||||
# import os
|
||||
|
||||
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
|
||||
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
# os.environ.pop("AWS_REGION_NAME", None)
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="bedrock/anthropic.claude-v2:1",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_region_name=aws_region_name,
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
|
||||
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# # test_completion_bedrock_claude_2_1_completion_auth()
|
||||
|
||||
|
||||
# def test_completion_bedrock_claude_external_client_auth():
|
||||
# print("\ncalling bedrock claude external client auth")
|
||||
# import os
|
||||
|
||||
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
|
||||
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
# os.environ.pop("AWS_REGION_NAME", None)
|
||||
|
||||
# try:
|
||||
# import boto3
|
||||
|
||||
# litellm.set_verbose = True
|
||||
|
||||
# bedrock = boto3.client(
|
||||
# service_name="bedrock-runtime",
|
||||
# region_name=aws_region_name,
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
|
||||
# )
|
||||
|
||||
# response = completion(
|
||||
# model="bedrock/anthropic.claude-instant-v1",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# aws_bedrock_client=bedrock,
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
|
||||
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# # test_completion_bedrock_claude_external_client_auth()
|
||||
|
||||
|
||||
# @pytest.mark.skip(reason="Expired token, need to renew")
|
||||
# def test_completion_bedrock_claude_sts_client_auth():
|
||||
# print("\ncalling bedrock claude external client auth")
|
||||
# import os
|
||||
|
||||
# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
|
||||
# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
|
||||
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
|
||||
# try:
|
||||
# import boto3
|
||||
|
||||
# litellm.set_verbose = True
|
||||
|
||||
# response = completion(
|
||||
# model="bedrock/anthropic.claude-instant-v1",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# aws_region_name=aws_region_name,
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_role_name=aws_role_name,
|
||||
# aws_session_name="my-test-session",
|
||||
# )
|
||||
|
||||
# response = embedding(
|
||||
# model="cohere.embed-multilingual-v3",
|
||||
# input=["hello world"],
|
||||
# aws_region_name="us-east-1",
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_role_name=aws_role_name,
|
||||
# aws_session_name="my-test-session",
|
||||
# )
|
||||
|
||||
# response = completion(
|
||||
# model="gpt-3.5-turbo",
|
||||
# messages=messages,
|
||||
# aws_region_name="us-east-1",
|
||||
# aws_access_key_id=aws_access_key_id,
|
||||
# aws_secret_access_key=aws_secret_access_key,
|
||||
# aws_role_name=aws_role_name,
|
||||
# aws_session_name="my-test-session",
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# print(response)
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# # test_completion_bedrock_claude_sts_client_auth()
|
||||
|
||||
|
||||
# def test_provisioned_throughput():
|
||||
# try:
|
||||
# litellm.set_verbose = True
|
||||
# import botocore, json, io
|
||||
# import botocore.session
|
||||
# from botocore.stub import Stubber
|
||||
|
||||
# bedrock_client = botocore.session.get_session().create_client(
|
||||
# "bedrock-runtime", region_name="us-east-1"
|
||||
# )
|
||||
|
||||
# expected_params = {
|
||||
# "accept": "application/json",
|
||||
# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
|
||||
# '"max_tokens_to_sample": 256}',
|
||||
# "contentType": "application/json",
|
||||
# "modelId": "provisioned-model-arn",
|
||||
# }
|
||||
# response_from_bedrock = {
|
||||
# "body": io.StringIO(
|
||||
# json.dumps(
|
||||
# {
|
||||
# "completion": " Here is a short poem about the sky:",
|
||||
# "stop_reason": "max_tokens",
|
||||
# "stop": None,
|
||||
# }
|
||||
# )
|
||||
# ),
|
||||
# "contentType": "contentType",
|
||||
# "ResponseMetadata": {"HTTPStatusCode": 200},
|
||||
# }
|
||||
|
||||
# with Stubber(bedrock_client) as stubber:
|
||||
# stubber.add_response(
|
||||
# "invoke_model",
|
||||
# service_response=response_from_bedrock,
|
||||
# expected_params=expected_params,
|
||||
# )
|
||||
# response = litellm.completion(
|
||||
# model="bedrock/anthropic.claude-instant-v1",
|
||||
# model_id="provisioned-model-arn",
|
||||
# messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||
# aws_bedrock_client=bedrock_client,
|
||||
# )
|
||||
# print("response stubbed", response)
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# # test_provisioned_throughput()
|
||||
|
||||
|
||||
# def test_completion_bedrock_mistral_completion_auth():
|
||||
# print("calling bedrock mistral completion params auth")
|
||||
# import os
|
||||
#
|
||||
# # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||
# # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
# # aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
#
|
||||
# # os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||
# # os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||
# # os.environ.pop("AWS_REGION_NAME", None)
|
||||
# try:
|
||||
# response:ModelResponse = completion(
|
||||
# model="bedrock/mistral.mistral-7b-instruct-v0:2",
|
||||
# messages=messages,
|
||||
# max_tokens=10,
|
||||
# temperature=0.1,
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# assert len(response.choices) > 0
|
||||
# assert len(response.choices[0].message.content) > 0
|
||||
#
|
||||
# # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||
# # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||
# # os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||
# except RateLimitError:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
#
|
||||
#
|
||||
# test_completion_bedrock_mistral_completion_auth()
|
|
@ -546,7 +546,6 @@ def test_redis_cache_acompletion_stream():
|
|||
# test_redis_cache_acompletion_stream()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_redis_cache_acompletion_stream_bedrock():
|
||||
import asyncio
|
||||
|
||||
|
|
|
@ -1648,7 +1648,6 @@ def test_completion_chat_sagemaker_mistral():
|
|||
# test_completion_chat_sagemaker_mistral()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_titan_null_response():
|
||||
try:
|
||||
response = completion(
|
||||
|
@ -1674,7 +1673,6 @@ def test_completion_bedrock_titan_null_response():
|
|||
pytest.fail(f"An error occurred - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_titan():
|
||||
try:
|
||||
response = completion(
|
||||
|
@ -1696,7 +1694,6 @@ def test_completion_bedrock_titan():
|
|||
# test_completion_bedrock_titan()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_claude():
|
||||
print("calling claude")
|
||||
try:
|
||||
|
@ -1718,7 +1715,6 @@ def test_completion_bedrock_claude():
|
|||
# test_completion_bedrock_claude()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_cohere():
|
||||
print("calling bedrock cohere")
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -171,7 +171,6 @@ def test_cost_openai_image_gen():
|
|||
assert cost == 0.019922944
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_cost_bedrock_pricing():
|
||||
"""
|
||||
- get pricing specific to region for a model
|
||||
|
|
|
@ -478,7 +478,6 @@ async def test_async_chat_azure_stream():
|
|||
|
||||
|
||||
## Test Bedrock + sync
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_chat_bedrock_stream():
|
||||
try:
|
||||
customHandler = CompletionCustomHandler()
|
||||
|
@ -519,7 +518,6 @@ def test_chat_bedrock_stream():
|
|||
|
||||
|
||||
## Test Bedrock + Async
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_chat_bedrock_stream():
|
||||
try:
|
||||
|
@ -796,7 +794,6 @@ async def test_async_embedding_azure():
|
|||
|
||||
|
||||
## Test Bedrock + Async
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_embedding_bedrock():
|
||||
try:
|
||||
|
|
|
@ -256,7 +256,6 @@ async def test_vertexai_aembedding():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_bedrock_embedding_titan():
|
||||
try:
|
||||
# this tests if we support str input for bedrock embedding
|
||||
|
@ -302,7 +301,6 @@ def test_bedrock_embedding_titan():
|
|||
# test_bedrock_embedding_titan()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_bedrock_embedding_cohere():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
|
|
|
@ -121,7 +121,6 @@ async def test_async_image_generation_azure():
|
|||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_image_generation_bedrock():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
@ -142,7 +141,6 @@ def test_image_generation_bedrock():
|
|||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_aimage_generation_bedrock_with_optional_params():
|
||||
try:
|
||||
|
|
|
@ -515,7 +515,6 @@ def sagemaker_test_completion():
|
|||
# Bedrock
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def bedrock_test_completion():
|
||||
litellm.AmazonCohereConfig(max_tokens=10)
|
||||
# litellm.set_verbose=True
|
||||
|
|
|
@ -125,7 +125,6 @@ def test_embedding(client_no_auth):
|
|||
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_bedrock_embedding(client_no_auth):
|
||||
global headers
|
||||
from litellm.proxy.proxy_server import user_custom_auth
|
||||
|
|
|
@ -575,7 +575,6 @@ def test_azure_embedding_on_router():
|
|||
# test_azure_embedding_on_router()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_bedrock_on_router():
|
||||
litellm.set_verbose = True
|
||||
print("\n Testing bedrock on router\n")
|
||||
|
|
|
@ -87,7 +87,6 @@ def test_router_timeouts():
|
|||
print("********** TOKENS USED SO FAR = ", total_tokens_used)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_timeouts_bedrock():
|
||||
import openai
|
||||
|
|
|
@ -764,7 +764,6 @@ def test_completion_replicate_stream_bad_key():
|
|||
# test_completion_replicate_stream_bad_key()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_claude_stream():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
|
@ -811,7 +810,6 @@ def test_completion_bedrock_claude_stream():
|
|||
# test_completion_bedrock_claude_stream()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
def test_completion_bedrock_ai21_stream():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
|
|
|
@ -4513,20 +4513,24 @@ def get_optional_params(
|
|||
if stream:
|
||||
optional_params["stream"] = stream
|
||||
elif "anthropic" in model:
|
||||
supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||
supported_params = get_mapped_model_params(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
# anthropic params on bedrock
|
||||
# \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
|
||||
if max_tokens is not None:
|
||||
optional_params["max_tokens_to_sample"] = max_tokens
|
||||
if temperature is not None:
|
||||
optional_params["temperature"] = temperature
|
||||
if top_p is not None:
|
||||
optional_params["top_p"] = top_p
|
||||
if stop is not None:
|
||||
optional_params["stop_sequences"] = stop
|
||||
if stream:
|
||||
optional_params["stream"] = stream
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
optional_params = (
|
||||
litellm.AmazonAnthropicClaude3Config.map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
)
|
||||
else:
|
||||
optional_params = litellm.AmazonAnthropicConfig.map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
elif "amazon" in model: # amazon titan llms
|
||||
supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
|
@ -4991,6 +4995,17 @@ def get_optional_params(
|
|||
return optional_params
|
||||
|
||||
|
||||
def get_mapped_model_params(model: str, custom_llm_provider: str):
|
||||
"""
|
||||
Returns the supported openai params for a given model + provider
|
||||
"""
|
||||
if custom_llm_provider == "bedrock":
|
||||
if model.startswith("anthropic.claude-3"):
|
||||
return litellm.AmazonAnthropicClaude3Config().get_supported_openai_params()
|
||||
else:
|
||||
return litellm.AmazonAnthropicConfig().get_supported_openai_params()
|
||||
|
||||
|
||||
def get_llm_provider(
|
||||
model: str,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue