test: removing aws tests - account suspended - pending their approval

This commit is contained in:
Krrish Dholakia 2024-02-28 13:46:20 -08:00
parent dccfdc241b
commit 4c951d20bc
18 changed files with 966 additions and 951 deletions

View file

@ -1,253 +1,254 @@
import sys
import os
import io, asyncio
## @pytest.mark.skip(reason="AWS Suspended Account")
# import sys
# import os
# import io, asyncio
# import logging
# logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, os.path.abspath("../.."))
# # import logging
# # logging.basicConfig(level=logging.DEBUG)
# sys.path.insert(0, os.path.abspath("../.."))
from litellm import completion
import litellm
# from litellm import completion
# import litellm
litellm.num_retries = 3
# litellm.num_retries = 3
import time, random
import pytest
# import time, random
# import pytest
def test_s3_logging():
# all s3 requests need to be in one test function
# since we are modifying stdout, and pytests runs tests in parallel
# on circle ci - we only test litellm.acompletion()
try:
# redirect stdout to log_file
litellm.cache = litellm.Cache(
type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
)
# def test_s3_logging():
# # all s3 requests need to be in one test function
# # since we are modifying stdout, and pytests runs tests in parallel
# # on circle ci - we only test litellm.acompletion()
# try:
# # redirect stdout to log_file
# litellm.cache = litellm.Cache(
# type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
# )
litellm.success_callback = ["s3"]
litellm.s3_callback_params = {
"s3_bucket_name": "litellm-logs",
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
}
litellm.set_verbose = True
# litellm.success_callback = ["s3"]
# litellm.s3_callback_params = {
# "s3_bucket_name": "litellm-logs",
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
# }
# litellm.set_verbose = True
print("Testing async s3 logging")
# print("Testing async s3 logging")
expected_keys = []
# expected_keys = []
import time
# import time
curr_time = str(time.time())
# curr_time = str(time.time())
async def _test():
return await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10,
temperature=0.7,
user="ishaan-2",
)
# async def _test():
# return await litellm.acompletion(
# model="gpt-3.5-turbo",
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
# max_tokens=10,
# temperature=0.7,
# user="ishaan-2",
# )
response = asyncio.run(_test())
print(f"response: {response}")
expected_keys.append(response.id)
# response = asyncio.run(_test())
# print(f"response: {response}")
# expected_keys.append(response.id)
async def _test():
return await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10,
temperature=0.7,
user="ishaan-2",
)
# async def _test():
# return await litellm.acompletion(
# model="gpt-3.5-turbo",
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
# max_tokens=10,
# temperature=0.7,
# user="ishaan-2",
# )
response = asyncio.run(_test())
expected_keys.append(response.id)
print(f"response: {response}")
time.sleep(5) # wait 5s for logs to land
# response = asyncio.run(_test())
# expected_keys.append(response.id)
# print(f"response: {response}")
# time.sleep(5) # wait 5s for logs to land
import boto3
# import boto3
s3 = boto3.client("s3")
bucket_name = "litellm-logs"
# List objects in the bucket
response = s3.list_objects(Bucket=bucket_name)
# s3 = boto3.client("s3")
# bucket_name = "litellm-logs"
# # List objects in the bucket
# response = s3.list_objects(Bucket=bucket_name)
# Sort the objects based on the LastModified timestamp
objects = sorted(
response["Contents"], key=lambda x: x["LastModified"], reverse=True
)
# Get the keys of the most recent objects
most_recent_keys = [obj["Key"] for obj in objects]
print(most_recent_keys)
# for each key, get the part before "-" as the key. Do it safely
cleaned_keys = []
for key in most_recent_keys:
split_key = key.split("_")
if len(split_key) < 2:
continue
cleaned_keys.append(split_key[1])
print("\n most recent keys", most_recent_keys)
print("\n cleaned keys", cleaned_keys)
print("\n Expected keys: ", expected_keys)
matches = 0
for key in expected_keys:
key += ".json"
assert key in cleaned_keys
# # Sort the objects based on the LastModified timestamp
# objects = sorted(
# response["Contents"], key=lambda x: x["LastModified"], reverse=True
# )
# # Get the keys of the most recent objects
# most_recent_keys = [obj["Key"] for obj in objects]
# print(most_recent_keys)
# # for each key, get the part before "-" as the key. Do it safely
# cleaned_keys = []
# for key in most_recent_keys:
# split_key = key.split("_")
# if len(split_key) < 2:
# continue
# cleaned_keys.append(split_key[1])
# print("\n most recent keys", most_recent_keys)
# print("\n cleaned keys", cleaned_keys)
# print("\n Expected keys: ", expected_keys)
# matches = 0
# for key in expected_keys:
# key += ".json"
# assert key in cleaned_keys
if key in cleaned_keys:
matches += 1
# remove the match key
cleaned_keys.remove(key)
# this asserts we log, the first request + the 2nd cached request
print("we had two matches ! passed ", matches)
assert matches == 2
try:
# cleanup s3 bucket in test
for key in most_recent_keys:
s3.delete_object(Bucket=bucket_name, Key=key)
except:
# don't let cleanup fail a test
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
finally:
# post, close log file and verify
# Reset stdout to the original value
print("Passed! Testing async s3 logging")
# if key in cleaned_keys:
# matches += 1
# # remove the match key
# cleaned_keys.remove(key)
# # this asserts we log, the first request + the 2nd cached request
# print("we had two matches ! passed ", matches)
# assert matches == 2
# try:
# # cleanup s3 bucket in test
# for key in most_recent_keys:
# s3.delete_object(Bucket=bucket_name, Key=key)
# except:
# # don't let cleanup fail a test
# pass
# except Exception as e:
# pytest.fail(f"An exception occurred - {e}")
# finally:
# # post, close log file and verify
# # Reset stdout to the original value
# print("Passed! Testing async s3 logging")
# test_s3_logging()
# # test_s3_logging()
def test_s3_logging_async():
# this tests time added to make s3 logging calls, vs just acompletion calls
try:
litellm.set_verbose = True
# Make 5 calls with an empty success_callback
litellm.success_callback = []
start_time_empty_callback = asyncio.run(make_async_calls())
print("done with no callback test")
# def test_s3_logging_async():
# # this tests time added to make s3 logging calls, vs just acompletion calls
# try:
# litellm.set_verbose = True
# # Make 5 calls with an empty success_callback
# litellm.success_callback = []
# start_time_empty_callback = asyncio.run(make_async_calls())
# print("done with no callback test")
print("starting s3 logging load test")
# Make 5 calls with success_callback set to "langfuse"
litellm.success_callback = ["s3"]
litellm.s3_callback_params = {
"s3_bucket_name": "litellm-logs",
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
}
start_time_s3 = asyncio.run(make_async_calls())
print("done with s3 test")
# print("starting s3 logging load test")
# # Make 5 calls with success_callback set to "langfuse"
# litellm.success_callback = ["s3"]
# litellm.s3_callback_params = {
# "s3_bucket_name": "litellm-logs",
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
# }
# start_time_s3 = asyncio.run(make_async_calls())
# print("done with s3 test")
# Compare the time for both scenarios
print(f"Time taken with success_callback='s3': {start_time_s3}")
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
# # Compare the time for both scenarios
# print(f"Time taken with success_callback='s3': {start_time_s3}")
# print(f"Time taken with empty success_callback: {start_time_empty_callback}")
# assert the diff is not more than 1 second
assert abs(start_time_s3 - start_time_empty_callback) < 1
# # assert the diff is not more than 1 second
# assert abs(start_time_s3 - start_time_empty_callback) < 1
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
# except litellm.Timeout as e:
# pass
# except Exception as e:
# pytest.fail(f"An exception occurred - {e}")
async def make_async_calls():
tasks = []
for _ in range(5):
task = asyncio.create_task(
litellm.acompletion(
model="azure/chatgpt-v-2",
messages=[{"role": "user", "content": "This is a test"}],
max_tokens=5,
temperature=0.7,
timeout=5,
user="langfuse_latency_test_user",
mock_response="It's simple to use and easy to get started",
)
)
tasks.append(task)
# async def make_async_calls():
# tasks = []
# for _ in range(5):
# task = asyncio.create_task(
# litellm.acompletion(
# model="azure/chatgpt-v-2",
# messages=[{"role": "user", "content": "This is a test"}],
# max_tokens=5,
# temperature=0.7,
# timeout=5,
# user="langfuse_latency_test_user",
# mock_response="It's simple to use and easy to get started",
# )
# )
# tasks.append(task)
# Measure the start time before running the tasks
start_time = asyncio.get_event_loop().time()
# # Measure the start time before running the tasks
# start_time = asyncio.get_event_loop().time()
# Wait for all tasks to complete
responses = await asyncio.gather(*tasks)
# # Wait for all tasks to complete
# responses = await asyncio.gather(*tasks)
# Print the responses when tasks return
for idx, response in enumerate(responses):
print(f"Response from Task {idx + 1}: {response}")
# # Print the responses when tasks return
# for idx, response in enumerate(responses):
# print(f"Response from Task {idx + 1}: {response}")
# Calculate the total time taken
total_time = asyncio.get_event_loop().time() - start_time
# # Calculate the total time taken
# total_time = asyncio.get_event_loop().time() - start_time
return total_time
# return total_time
def test_s3_logging_r2():
# all s3 requests need to be in one test function
# since we are modifying stdout, and pytests runs tests in parallel
# on circle ci - we only test litellm.acompletion()
try:
# redirect stdout to log_file
# litellm.cache = litellm.Cache(
# type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
# )
litellm.set_verbose = True
from litellm._logging import verbose_logger
import logging
# def test_s3_logging_r2():
# # all s3 requests need to be in one test function
# # since we are modifying stdout, and pytests runs tests in parallel
# # on circle ci - we only test litellm.acompletion()
# try:
# # redirect stdout to log_file
# # litellm.cache = litellm.Cache(
# # type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
# # )
# litellm.set_verbose = True
# from litellm._logging import verbose_logger
# import logging
verbose_logger.setLevel(level=logging.DEBUG)
# verbose_logger.setLevel(level=logging.DEBUG)
litellm.success_callback = ["s3"]
litellm.s3_callback_params = {
"s3_bucket_name": "litellm-r2-bucket",
"s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
"s3_endpoint_url": "os.environ/R2_S3_URL",
"s3_region_name": "os.environ/R2_S3_REGION_NAME",
}
print("Testing async s3 logging")
# litellm.success_callback = ["s3"]
# litellm.s3_callback_params = {
# "s3_bucket_name": "litellm-r2-bucket",
# "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
# "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
# "s3_endpoint_url": "os.environ/R2_S3_URL",
# "s3_region_name": "os.environ/R2_S3_REGION_NAME",
# }
# print("Testing async s3 logging")
expected_keys = []
# expected_keys = []
import time
# import time
curr_time = str(time.time())
# curr_time = str(time.time())
async def _test():
return await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
max_tokens=10,
temperature=0.7,
user="ishaan-2",
)
# async def _test():
# return await litellm.acompletion(
# model="gpt-3.5-turbo",
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
# max_tokens=10,
# temperature=0.7,
# user="ishaan-2",
# )
response = asyncio.run(_test())
print(f"response: {response}")
expected_keys.append(response.id)
# response = asyncio.run(_test())
# print(f"response: {response}")
# expected_keys.append(response.id)
import boto3
# import boto3
s3 = boto3.client(
"s3",
endpoint_url=os.getenv("R2_S3_URL"),
region_name=os.getenv("R2_S3_REGION_NAME"),
aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
)
# s3 = boto3.client(
# "s3",
# endpoint_url=os.getenv("R2_S3_URL"),
# region_name=os.getenv("R2_S3_REGION_NAME"),
# aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
# aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
# )
bucket_name = "litellm-r2-bucket"
# List objects in the bucket
response = s3.list_objects(Bucket=bucket_name)
# bucket_name = "litellm-r2-bucket"
# # List objects in the bucket
# response = s3.list_objects(Bucket=bucket_name)
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
finally:
# post, close log file and verify
# Reset stdout to the original value
print("Passed! Testing async s3 logging")
# except Exception as e:
# pytest.fail(f"An exception occurred - {e}")
# finally:
# # post, close log file and verify
# # Reset stdout to the original value
# print("Passed! Testing async s3 logging")

View file

@ -203,6 +203,7 @@ async def test_hf_completion_tgi():
# test_get_cloudflare_response_streaming()
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_completion_sagemaker():
# litellm.set_verbose=True

View file

@ -1,258 +1,258 @@
import sys, os
import traceback
from dotenv import load_dotenv
# import sys, os
# import traceback
# from dotenv import load_dotenv
load_dotenv()
import os, io
# load_dotenv()
# import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion, completion_cost, Timeout
from litellm import RateLimitError
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path
# import pytest
# import litellm
# from litellm import embedding, completion, completion_cost, Timeout
# from litellm import RateLimitError
# litellm.num_retries = 3
litellm.cache = None
litellm.success_callback = []
user_message = "Write a short poem about the sky"
messages = [{"content": user_message, "role": "user"}]
# # litellm.num_retries = 3
# litellm.cache = None
# litellm.success_callback = []
# user_message = "Write a short poem about the sky"
# messages = [{"content": user_message, "role": "user"}]
@pytest.fixture(autouse=True)
def reset_callbacks():
print("\npytest fixture - resetting callbacks")
litellm.success_callback = []
litellm._async_success_callback = []
litellm.failure_callback = []
litellm.callbacks = []
# @pytest.fixture(autouse=True)
# def reset_callbacks():
# print("\npytest fixture - resetting callbacks")
# litellm.success_callback = []
# litellm._async_success_callback = []
# litellm.failure_callback = []
# litellm.callbacks = []
def test_completion_bedrock_claude_completion_auth():
print("calling bedrock claude completion params auth")
import os
# def test_completion_bedrock_claude_completion_auth():
# print("calling bedrock claude completion params auth")
# import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
try:
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name,
)
# Add any assertions here to check the response
print(response)
# try:
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_region_name=aws_region_name,
# )
# # Add any assertions here to check the response
# print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_completion_auth()
# # test_completion_bedrock_claude_completion_auth()
def test_completion_bedrock_claude_2_1_completion_auth():
print("calling bedrock claude 2.1 completion params auth")
import os
# def test_completion_bedrock_claude_2_1_completion_auth():
# print("calling bedrock claude 2.1 completion params auth")
# import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
try:
response = completion(
model="bedrock/anthropic.claude-v2:1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name,
)
# Add any assertions here to check the response
print(response)
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
# try:
# response = completion(
# model="bedrock/anthropic.claude-v2:1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_region_name=aws_region_name,
# )
# # Add any assertions here to check the response
# print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_2_1_completion_auth()
# # test_completion_bedrock_claude_2_1_completion_auth()
def test_completion_bedrock_claude_external_client_auth():
print("\ncalling bedrock claude external client auth")
import os
# def test_completion_bedrock_claude_external_client_auth():
# print("\ncalling bedrock claude external client auth")
# import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# os.environ.pop("AWS_REGION_NAME", None)
try:
import boto3
# try:
# import boto3
litellm.set_verbose = True
# litellm.set_verbose = True
bedrock = boto3.client(
service_name="bedrock-runtime",
region_name=aws_region_name,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
)
# bedrock = boto3.client(
# service_name="bedrock-runtime",
# region_name=aws_region_name,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
# )
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_bedrock_client=bedrock,
)
# Add any assertions here to check the response
print(response)
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_bedrock_client=bedrock,
# )
# # Add any assertions here to check the response
# print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
# os.environ["AWS_REGION_NAME"] = aws_region_name
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_external_client_auth()
# # test_completion_bedrock_claude_external_client_auth()
@pytest.mark.skip(reason="Expired token, need to renew")
def test_completion_bedrock_claude_sts_client_auth():
print("\ncalling bedrock claude external client auth")
import os
# @pytest.mark.skip(reason="Expired token, need to renew")
# def test_completion_bedrock_claude_sts_client_auth():
# print("\ncalling bedrock claude external client auth")
# import os
aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
# aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
try:
import boto3
# try:
# import boto3
litellm.set_verbose = True
# litellm.set_verbose = True
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_region_name=aws_region_name,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
# response = completion(
# model="bedrock/anthropic.claude-instant-v1",
# messages=messages,
# max_tokens=10,
# temperature=0.1,
# aws_region_name=aws_region_name,
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
response = embedding(
model="cohere.embed-multilingual-v3",
input=["hello world"],
aws_region_name="us-east-1",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
# response = embedding(
# model="cohere.embed-multilingual-v3",
# input=["hello world"],
# aws_region_name="us-east-1",
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
response = completion(
model="gpt-3.5-turbo",
messages=messages,
aws_region_name="us-east-1",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
# Add any assertions here to check the response
print(response)
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# response = completion(
# model="gpt-3.5-turbo",
# messages=messages,
# aws_region_name="us-east-1",
# aws_access_key_id=aws_access_key_id,
# aws_secret_access_key=aws_secret_access_key,
# aws_role_name=aws_role_name,
# aws_session_name="my-test-session",
# )
# # Add any assertions here to check the response
# print(response)
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_sts_client_auth()
# # test_completion_bedrock_claude_sts_client_auth()
def test_provisioned_throughput():
try:
litellm.set_verbose = True
import botocore, json, io
import botocore.session
from botocore.stub import Stubber
# def test_provisioned_throughput():
# try:
# litellm.set_verbose = True
# import botocore, json, io
# import botocore.session
# from botocore.stub import Stubber
bedrock_client = botocore.session.get_session().create_client(
"bedrock-runtime", region_name="us-east-1"
)
# bedrock_client = botocore.session.get_session().create_client(
# "bedrock-runtime", region_name="us-east-1"
# )
expected_params = {
"accept": "application/json",
"body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
'"max_tokens_to_sample": 256}',
"contentType": "application/json",
"modelId": "provisioned-model-arn",
}
response_from_bedrock = {
"body": io.StringIO(
json.dumps(
{
"completion": " Here is a short poem about the sky:",
"stop_reason": "max_tokens",
"stop": None,
}
)
),
"contentType": "contentType",
"ResponseMetadata": {"HTTPStatusCode": 200},
}
# expected_params = {
# "accept": "application/json",
# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
# '"max_tokens_to_sample": 256}',
# "contentType": "application/json",
# "modelId": "provisioned-model-arn",
# }
# response_from_bedrock = {
# "body": io.StringIO(
# json.dumps(
# {
# "completion": " Here is a short poem about the sky:",
# "stop_reason": "max_tokens",
# "stop": None,
# }
# )
# ),
# "contentType": "contentType",
# "ResponseMetadata": {"HTTPStatusCode": 200},
# }
with Stubber(bedrock_client) as stubber:
stubber.add_response(
"invoke_model",
service_response=response_from_bedrock,
expected_params=expected_params,
)
response = litellm.completion(
model="bedrock/anthropic.claude-instant-v1",
model_id="provisioned-model-arn",
messages=[{"content": "Hello, how are you?", "role": "user"}],
aws_bedrock_client=bedrock_client,
)
print("response stubbed", response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# with Stubber(bedrock_client) as stubber:
# stubber.add_response(
# "invoke_model",
# service_response=response_from_bedrock,
# expected_params=expected_params,
# )
# response = litellm.completion(
# model="bedrock/anthropic.claude-instant-v1",
# model_id="provisioned-model-arn",
# messages=[{"content": "Hello, how are you?", "role": "user"}],
# aws_bedrock_client=bedrock_client,
# )
# print("response stubbed", response)
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_provisioned_throughput()
# # test_provisioned_throughput()

View file

@ -546,6 +546,7 @@ def test_redis_cache_acompletion_stream():
# test_redis_cache_acompletion_stream()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_redis_cache_acompletion_stream_bedrock():
import asyncio

View file

@ -1404,6 +1404,7 @@ def test_customprompt_together_ai():
# test_customprompt_together_ai()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker():
try:
litellm.set_verbose = True
@ -1429,6 +1430,7 @@ def test_completion_sagemaker():
# test_completion_sagemaker()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker_stream():
try:
litellm.set_verbose = False
@ -1459,6 +1461,7 @@ def test_completion_sagemaker_stream():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_chat_sagemaker():
try:
messages = [{"role": "user", "content": "Hey, how's it going?"}]
@ -1483,6 +1486,7 @@ def test_completion_chat_sagemaker():
# test_completion_chat_sagemaker()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_chat_sagemaker_mistral():
try:
messages = [{"role": "user", "content": "Hey, how's it going?"}]
@ -1501,6 +1505,7 @@ def test_completion_chat_sagemaker_mistral():
# test_completion_chat_sagemaker_mistral()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_titan_null_response():
try:
response = completion(
@ -1526,6 +1531,7 @@ def test_completion_bedrock_titan_null_response():
pytest.fail(f"An error occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_titan():
try:
response = completion(
@ -1568,6 +1574,7 @@ def test_completion_bedrock_claude():
# test_completion_bedrock_claude()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_cohere():
print("calling bedrock cohere")
litellm.set_verbose = True

View file

@ -171,6 +171,7 @@ def test_cost_openai_image_gen():
assert cost == 0.019922944
@pytest.mark.skip(reason="AWS Suspended Account")
def test_cost_bedrock_pricing():
"""
- get pricing specific to region for a model

View file

@ -478,6 +478,7 @@ async def test_async_chat_azure_stream():
## Test Bedrock + sync
@pytest.mark.skip(reason="AWS Suspended Account")
def test_chat_bedrock_stream():
try:
customHandler = CompletionCustomHandler()
@ -518,6 +519,7 @@ def test_chat_bedrock_stream():
## Test Bedrock + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_async_chat_bedrock_stream():
try:
@ -561,6 +563,7 @@ async def test_async_chat_bedrock_stream():
## Test Sagemaker + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_async_chat_sagemaker_stream():
try:
@ -793,6 +796,7 @@ async def test_async_embedding_azure():
## Test Bedrock + Async
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_async_embedding_bedrock():
try:

View file

@ -388,6 +388,7 @@ async def test_async_custom_handler_embedding_optional_param():
# asyncio.run(test_async_custom_handler_embedding_optional_param())
@pytest.mark.skip(reason="AWS Account suspended. Pending their approval")
@pytest.mark.asyncio
async def test_async_custom_handler_embedding_optional_param_bedrock():
"""

View file

@ -256,6 +256,7 @@ async def test_vertexai_aembedding():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_embedding_titan():
try:
# this tests if we support str input for bedrock embedding
@ -422,6 +423,7 @@ def test_aembedding_azure():
# test_aembedding_azure()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_sagemaker_embeddings():
try:
response = litellm.embedding(
@ -438,6 +440,7 @@ def test_sagemaker_embeddings():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_sagemaker_aembeddings():
try:

View file

@ -42,6 +42,7 @@ exception_models = [
# Test 1: Context Window Errors
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.parametrize("model", exception_models)
def test_context_window(model):
print("Testing context window error")
@ -120,9 +121,9 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
os.environ["AI21_API_KEY"] = "bad-key"
elif "togethercomputer" in model:
temporary_key = os.environ["TOGETHERAI_API_KEY"]
os.environ[
"TOGETHERAI_API_KEY"
] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
os.environ["TOGETHERAI_API_KEY"] = (
"84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
)
elif model in litellm.openrouter_models:
temporary_key = os.environ["OPENROUTER_API_KEY"]
os.environ["OPENROUTER_API_KEY"] = "bad-key"

View file

@ -87,6 +87,7 @@ async def test_azure_img_gen_health_check():
# asyncio.run(test_azure_img_gen_health_check())
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_sagemaker_embedding_health_check():
response = await litellm.ahealth_check(

View file

@ -121,6 +121,7 @@ async def test_async_image_generation_azure():
pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
def test_image_generation_bedrock():
try:
litellm.set_verbose = True
@ -141,6 +142,7 @@ def test_image_generation_bedrock():
pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_aimage_generation_bedrock_with_optional_params():
try:

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,7 @@ import litellm
from litellm import completion
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker():
litellm.set_verbose = True
litellm.drop_params = True

View file

@ -473,6 +473,7 @@ def aleph_alpha_test_completion():
# Sagemaker
@pytest.mark.skip(reason="AWS Suspended Account")
def sagemaker_test_completion():
litellm.SagemakerConfig(max_new_tokens=10)
# litellm.set_verbose=True
@ -514,6 +515,7 @@ def sagemaker_test_completion():
# Bedrock
@pytest.mark.skip(reason="AWS Suspended Account")
def bedrock_test_completion():
litellm.AmazonCohereConfig(max_tokens=10)
# litellm.set_verbose=True

View file

@ -166,14 +166,6 @@ def test_call_one_endpoint():
"tpm": 240000,
"rpm": 1800,
},
{
"model_name": "claude-v1",
"litellm_params": {
"model": "bedrock/anthropic.claude-instant-v1",
},
"tpm": 100000,
"rpm": 10000,
},
{
"model_name": "text-embedding-ada-002",
"litellm_params": {
@ -202,15 +194,6 @@ def test_call_one_endpoint():
)
print("\n response", response)
async def call_bedrock_claude():
response = await router.acompletion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{"role": "user", "content": "hello this request will pass"}],
specific_deployment=True,
)
print("\n response", response)
async def call_azure_embedding():
response = await router.aembedding(
model="azure/azure-embedding-model",
@ -221,7 +204,6 @@ def test_call_one_endpoint():
print("\n response", response)
asyncio.run(call_azure_completion())
asyncio.run(call_bedrock_claude())
asyncio.run(call_azure_embedding())
os.environ["AZURE_API_BASE"] = old_api_base
@ -593,6 +575,7 @@ def test_azure_embedding_on_router():
# test_azure_embedding_on_router()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_bedrock_on_router():
litellm.set_verbose = True
print("\n Testing bedrock on router\n")

View file

@ -87,6 +87,7 @@ def test_router_timeouts():
print("********** TOKENS USED SO FAR = ", total_tokens_used)
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_router_timeouts_bedrock():
import openai

View file

@ -764,6 +764,7 @@ def test_completion_replicate_stream_bad_key():
# test_completion_replicate_stream_bad_key()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_claude_stream():
try:
litellm.set_verbose = False
@ -810,6 +811,7 @@ def test_completion_bedrock_claude_stream():
# test_completion_bedrock_claude_stream()
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_bedrock_ai21_stream():
try:
litellm.set_verbose = False
@ -911,6 +913,7 @@ def test_sagemaker_weird_response():
# test_sagemaker_weird_response()
@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
async def test_sagemaker_streaming_async():
try:
@ -949,6 +952,7 @@ async def test_sagemaker_streaming_async():
# asyncio.run(test_sagemaker_streaming_async())
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker_stream():
try:
response = completion(