mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
test: removing aws tests - account suspended - pending their approval
This commit is contained in:
parent
dccfdc241b
commit
4c951d20bc
18 changed files with 966 additions and 951 deletions
|
@ -1,253 +1,254 @@
|
||||||
import sys
|
## @pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
import os
|
# import sys
|
||||||
import io, asyncio
|
# import os
|
||||||
|
# import io, asyncio
|
||||||
|
|
||||||
# import logging
|
# # import logging
|
||||||
# logging.basicConfig(level=logging.DEBUG)
|
# # logging.basicConfig(level=logging.DEBUG)
|
||||||
sys.path.insert(0, os.path.abspath("../.."))
|
# sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
|
||||||
from litellm import completion
|
# from litellm import completion
|
||||||
import litellm
|
# import litellm
|
||||||
|
|
||||||
litellm.num_retries = 3
|
# litellm.num_retries = 3
|
||||||
|
|
||||||
import time, random
|
# import time, random
|
||||||
import pytest
|
# import pytest
|
||||||
|
|
||||||
|
|
||||||
def test_s3_logging():
|
# def test_s3_logging():
|
||||||
# all s3 requests need to be in one test function
|
# # all s3 requests need to be in one test function
|
||||||
# since we are modifying stdout, and pytests runs tests in parallel
|
# # since we are modifying stdout, and pytests runs tests in parallel
|
||||||
# on circle ci - we only test litellm.acompletion()
|
# # on circle ci - we only test litellm.acompletion()
|
||||||
try:
|
# try:
|
||||||
# redirect stdout to log_file
|
# # redirect stdout to log_file
|
||||||
litellm.cache = litellm.Cache(
|
|
||||||
type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
|
|
||||||
)
|
|
||||||
|
|
||||||
litellm.success_callback = ["s3"]
|
|
||||||
litellm.s3_callback_params = {
|
|
||||||
"s3_bucket_name": "litellm-logs",
|
|
||||||
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
|
||||||
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
|
||||||
}
|
|
||||||
litellm.set_verbose = True
|
|
||||||
|
|
||||||
print("Testing async s3 logging")
|
|
||||||
|
|
||||||
expected_keys = []
|
|
||||||
|
|
||||||
import time
|
|
||||||
|
|
||||||
curr_time = str(time.time())
|
|
||||||
|
|
||||||
async def _test():
|
|
||||||
return await litellm.acompletion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
|
||||||
max_tokens=10,
|
|
||||||
temperature=0.7,
|
|
||||||
user="ishaan-2",
|
|
||||||
)
|
|
||||||
|
|
||||||
response = asyncio.run(_test())
|
|
||||||
print(f"response: {response}")
|
|
||||||
expected_keys.append(response.id)
|
|
||||||
|
|
||||||
async def _test():
|
|
||||||
return await litellm.acompletion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
|
||||||
max_tokens=10,
|
|
||||||
temperature=0.7,
|
|
||||||
user="ishaan-2",
|
|
||||||
)
|
|
||||||
|
|
||||||
response = asyncio.run(_test())
|
|
||||||
expected_keys.append(response.id)
|
|
||||||
print(f"response: {response}")
|
|
||||||
time.sleep(5) # wait 5s for logs to land
|
|
||||||
|
|
||||||
import boto3
|
|
||||||
|
|
||||||
s3 = boto3.client("s3")
|
|
||||||
bucket_name = "litellm-logs"
|
|
||||||
# List objects in the bucket
|
|
||||||
response = s3.list_objects(Bucket=bucket_name)
|
|
||||||
|
|
||||||
# Sort the objects based on the LastModified timestamp
|
|
||||||
objects = sorted(
|
|
||||||
response["Contents"], key=lambda x: x["LastModified"], reverse=True
|
|
||||||
)
|
|
||||||
# Get the keys of the most recent objects
|
|
||||||
most_recent_keys = [obj["Key"] for obj in objects]
|
|
||||||
print(most_recent_keys)
|
|
||||||
# for each key, get the part before "-" as the key. Do it safely
|
|
||||||
cleaned_keys = []
|
|
||||||
for key in most_recent_keys:
|
|
||||||
split_key = key.split("_")
|
|
||||||
if len(split_key) < 2:
|
|
||||||
continue
|
|
||||||
cleaned_keys.append(split_key[1])
|
|
||||||
print("\n most recent keys", most_recent_keys)
|
|
||||||
print("\n cleaned keys", cleaned_keys)
|
|
||||||
print("\n Expected keys: ", expected_keys)
|
|
||||||
matches = 0
|
|
||||||
for key in expected_keys:
|
|
||||||
key += ".json"
|
|
||||||
assert key in cleaned_keys
|
|
||||||
|
|
||||||
if key in cleaned_keys:
|
|
||||||
matches += 1
|
|
||||||
# remove the match key
|
|
||||||
cleaned_keys.remove(key)
|
|
||||||
# this asserts we log, the first request + the 2nd cached request
|
|
||||||
print("we had two matches ! passed ", matches)
|
|
||||||
assert matches == 2
|
|
||||||
try:
|
|
||||||
# cleanup s3 bucket in test
|
|
||||||
for key in most_recent_keys:
|
|
||||||
s3.delete_object(Bucket=bucket_name, Key=key)
|
|
||||||
except:
|
|
||||||
# don't let cleanup fail a test
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
finally:
|
|
||||||
# post, close log file and verify
|
|
||||||
# Reset stdout to the original value
|
|
||||||
print("Passed! Testing async s3 logging")
|
|
||||||
|
|
||||||
|
|
||||||
# test_s3_logging()
|
|
||||||
|
|
||||||
|
|
||||||
def test_s3_logging_async():
|
|
||||||
# this tests time added to make s3 logging calls, vs just acompletion calls
|
|
||||||
try:
|
|
||||||
litellm.set_verbose = True
|
|
||||||
# Make 5 calls with an empty success_callback
|
|
||||||
litellm.success_callback = []
|
|
||||||
start_time_empty_callback = asyncio.run(make_async_calls())
|
|
||||||
print("done with no callback test")
|
|
||||||
|
|
||||||
print("starting s3 logging load test")
|
|
||||||
# Make 5 calls with success_callback set to "langfuse"
|
|
||||||
litellm.success_callback = ["s3"]
|
|
||||||
litellm.s3_callback_params = {
|
|
||||||
"s3_bucket_name": "litellm-logs",
|
|
||||||
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
|
||||||
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
|
||||||
}
|
|
||||||
start_time_s3 = asyncio.run(make_async_calls())
|
|
||||||
print("done with s3 test")
|
|
||||||
|
|
||||||
# Compare the time for both scenarios
|
|
||||||
print(f"Time taken with success_callback='s3': {start_time_s3}")
|
|
||||||
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
|
||||||
|
|
||||||
# assert the diff is not more than 1 second
|
|
||||||
assert abs(start_time_s3 - start_time_empty_callback) < 1
|
|
||||||
|
|
||||||
except litellm.Timeout as e:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def make_async_calls():
|
|
||||||
tasks = []
|
|
||||||
for _ in range(5):
|
|
||||||
task = asyncio.create_task(
|
|
||||||
litellm.acompletion(
|
|
||||||
model="azure/chatgpt-v-2",
|
|
||||||
messages=[{"role": "user", "content": "This is a test"}],
|
|
||||||
max_tokens=5,
|
|
||||||
temperature=0.7,
|
|
||||||
timeout=5,
|
|
||||||
user="langfuse_latency_test_user",
|
|
||||||
mock_response="It's simple to use and easy to get started",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
tasks.append(task)
|
|
||||||
|
|
||||||
# Measure the start time before running the tasks
|
|
||||||
start_time = asyncio.get_event_loop().time()
|
|
||||||
|
|
||||||
# Wait for all tasks to complete
|
|
||||||
responses = await asyncio.gather(*tasks)
|
|
||||||
|
|
||||||
# Print the responses when tasks return
|
|
||||||
for idx, response in enumerate(responses):
|
|
||||||
print(f"Response from Task {idx + 1}: {response}")
|
|
||||||
|
|
||||||
# Calculate the total time taken
|
|
||||||
total_time = asyncio.get_event_loop().time() - start_time
|
|
||||||
|
|
||||||
return total_time
|
|
||||||
|
|
||||||
|
|
||||||
def test_s3_logging_r2():
|
|
||||||
# all s3 requests need to be in one test function
|
|
||||||
# since we are modifying stdout, and pytests runs tests in parallel
|
|
||||||
# on circle ci - we only test litellm.acompletion()
|
|
||||||
try:
|
|
||||||
# redirect stdout to log_file
|
|
||||||
# litellm.cache = litellm.Cache(
|
# litellm.cache = litellm.Cache(
|
||||||
# type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
|
# type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
|
||||||
# )
|
# )
|
||||||
litellm.set_verbose = True
|
|
||||||
from litellm._logging import verbose_logger
|
|
||||||
import logging
|
|
||||||
|
|
||||||
verbose_logger.setLevel(level=logging.DEBUG)
|
# litellm.success_callback = ["s3"]
|
||||||
|
# litellm.s3_callback_params = {
|
||||||
|
# "s3_bucket_name": "litellm-logs",
|
||||||
|
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||||
|
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||||
|
# }
|
||||||
|
# litellm.set_verbose = True
|
||||||
|
|
||||||
litellm.success_callback = ["s3"]
|
# print("Testing async s3 logging")
|
||||||
litellm.s3_callback_params = {
|
|
||||||
"s3_bucket_name": "litellm-r2-bucket",
|
|
||||||
"s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
|
|
||||||
"s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
|
|
||||||
"s3_endpoint_url": "os.environ/R2_S3_URL",
|
|
||||||
"s3_region_name": "os.environ/R2_S3_REGION_NAME",
|
|
||||||
}
|
|
||||||
print("Testing async s3 logging")
|
|
||||||
|
|
||||||
expected_keys = []
|
# expected_keys = []
|
||||||
|
|
||||||
import time
|
# import time
|
||||||
|
|
||||||
curr_time = str(time.time())
|
# curr_time = str(time.time())
|
||||||
|
|
||||||
async def _test():
|
# async def _test():
|
||||||
return await litellm.acompletion(
|
# return await litellm.acompletion(
|
||||||
model="gpt-3.5-turbo",
|
# model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.7,
|
# temperature=0.7,
|
||||||
user="ishaan-2",
|
# user="ishaan-2",
|
||||||
)
|
# )
|
||||||
|
|
||||||
response = asyncio.run(_test())
|
# response = asyncio.run(_test())
|
||||||
print(f"response: {response}")
|
# print(f"response: {response}")
|
||||||
expected_keys.append(response.id)
|
# expected_keys.append(response.id)
|
||||||
|
|
||||||
import boto3
|
# async def _test():
|
||||||
|
# return await litellm.acompletion(
|
||||||
|
# model="gpt-3.5-turbo",
|
||||||
|
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||||
|
# max_tokens=10,
|
||||||
|
# temperature=0.7,
|
||||||
|
# user="ishaan-2",
|
||||||
|
# )
|
||||||
|
|
||||||
s3 = boto3.client(
|
# response = asyncio.run(_test())
|
||||||
"s3",
|
# expected_keys.append(response.id)
|
||||||
endpoint_url=os.getenv("R2_S3_URL"),
|
# print(f"response: {response}")
|
||||||
region_name=os.getenv("R2_S3_REGION_NAME"),
|
# time.sleep(5) # wait 5s for logs to land
|
||||||
aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
|
|
||||||
aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
|
|
||||||
)
|
|
||||||
|
|
||||||
bucket_name = "litellm-r2-bucket"
|
# import boto3
|
||||||
# List objects in the bucket
|
|
||||||
response = s3.list_objects(Bucket=bucket_name)
|
|
||||||
|
|
||||||
except Exception as e:
|
# s3 = boto3.client("s3")
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
# bucket_name = "litellm-logs"
|
||||||
finally:
|
# # List objects in the bucket
|
||||||
# post, close log file and verify
|
# response = s3.list_objects(Bucket=bucket_name)
|
||||||
# Reset stdout to the original value
|
|
||||||
print("Passed! Testing async s3 logging")
|
# # Sort the objects based on the LastModified timestamp
|
||||||
|
# objects = sorted(
|
||||||
|
# response["Contents"], key=lambda x: x["LastModified"], reverse=True
|
||||||
|
# )
|
||||||
|
# # Get the keys of the most recent objects
|
||||||
|
# most_recent_keys = [obj["Key"] for obj in objects]
|
||||||
|
# print(most_recent_keys)
|
||||||
|
# # for each key, get the part before "-" as the key. Do it safely
|
||||||
|
# cleaned_keys = []
|
||||||
|
# for key in most_recent_keys:
|
||||||
|
# split_key = key.split("_")
|
||||||
|
# if len(split_key) < 2:
|
||||||
|
# continue
|
||||||
|
# cleaned_keys.append(split_key[1])
|
||||||
|
# print("\n most recent keys", most_recent_keys)
|
||||||
|
# print("\n cleaned keys", cleaned_keys)
|
||||||
|
# print("\n Expected keys: ", expected_keys)
|
||||||
|
# matches = 0
|
||||||
|
# for key in expected_keys:
|
||||||
|
# key += ".json"
|
||||||
|
# assert key in cleaned_keys
|
||||||
|
|
||||||
|
# if key in cleaned_keys:
|
||||||
|
# matches += 1
|
||||||
|
# # remove the match key
|
||||||
|
# cleaned_keys.remove(key)
|
||||||
|
# # this asserts we log, the first request + the 2nd cached request
|
||||||
|
# print("we had two matches ! passed ", matches)
|
||||||
|
# assert matches == 2
|
||||||
|
# try:
|
||||||
|
# # cleanup s3 bucket in test
|
||||||
|
# for key in most_recent_keys:
|
||||||
|
# s3.delete_object(Bucket=bucket_name, Key=key)
|
||||||
|
# except:
|
||||||
|
# # don't let cleanup fail a test
|
||||||
|
# pass
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"An exception occurred - {e}")
|
||||||
|
# finally:
|
||||||
|
# # post, close log file and verify
|
||||||
|
# # Reset stdout to the original value
|
||||||
|
# print("Passed! Testing async s3 logging")
|
||||||
|
|
||||||
|
|
||||||
|
# # test_s3_logging()
|
||||||
|
|
||||||
|
|
||||||
|
# def test_s3_logging_async():
|
||||||
|
# # this tests time added to make s3 logging calls, vs just acompletion calls
|
||||||
|
# try:
|
||||||
|
# litellm.set_verbose = True
|
||||||
|
# # Make 5 calls with an empty success_callback
|
||||||
|
# litellm.success_callback = []
|
||||||
|
# start_time_empty_callback = asyncio.run(make_async_calls())
|
||||||
|
# print("done with no callback test")
|
||||||
|
|
||||||
|
# print("starting s3 logging load test")
|
||||||
|
# # Make 5 calls with success_callback set to "langfuse"
|
||||||
|
# litellm.success_callback = ["s3"]
|
||||||
|
# litellm.s3_callback_params = {
|
||||||
|
# "s3_bucket_name": "litellm-logs",
|
||||||
|
# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||||
|
# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||||
|
# }
|
||||||
|
# start_time_s3 = asyncio.run(make_async_calls())
|
||||||
|
# print("done with s3 test")
|
||||||
|
|
||||||
|
# # Compare the time for both scenarios
|
||||||
|
# print(f"Time taken with success_callback='s3': {start_time_s3}")
|
||||||
|
# print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
||||||
|
|
||||||
|
# # assert the diff is not more than 1 second
|
||||||
|
# assert abs(start_time_s3 - start_time_empty_callback) < 1
|
||||||
|
|
||||||
|
# except litellm.Timeout as e:
|
||||||
|
# pass
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"An exception occurred - {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# async def make_async_calls():
|
||||||
|
# tasks = []
|
||||||
|
# for _ in range(5):
|
||||||
|
# task = asyncio.create_task(
|
||||||
|
# litellm.acompletion(
|
||||||
|
# model="azure/chatgpt-v-2",
|
||||||
|
# messages=[{"role": "user", "content": "This is a test"}],
|
||||||
|
# max_tokens=5,
|
||||||
|
# temperature=0.7,
|
||||||
|
# timeout=5,
|
||||||
|
# user="langfuse_latency_test_user",
|
||||||
|
# mock_response="It's simple to use and easy to get started",
|
||||||
|
# )
|
||||||
|
# )
|
||||||
|
# tasks.append(task)
|
||||||
|
|
||||||
|
# # Measure the start time before running the tasks
|
||||||
|
# start_time = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
|
# # Wait for all tasks to complete
|
||||||
|
# responses = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# # Print the responses when tasks return
|
||||||
|
# for idx, response in enumerate(responses):
|
||||||
|
# print(f"Response from Task {idx + 1}: {response}")
|
||||||
|
|
||||||
|
# # Calculate the total time taken
|
||||||
|
# total_time = asyncio.get_event_loop().time() - start_time
|
||||||
|
|
||||||
|
# return total_time
|
||||||
|
|
||||||
|
|
||||||
|
# def test_s3_logging_r2():
|
||||||
|
# # all s3 requests need to be in one test function
|
||||||
|
# # since we are modifying stdout, and pytests runs tests in parallel
|
||||||
|
# # on circle ci - we only test litellm.acompletion()
|
||||||
|
# try:
|
||||||
|
# # redirect stdout to log_file
|
||||||
|
# # litellm.cache = litellm.Cache(
|
||||||
|
# # type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
|
||||||
|
# # )
|
||||||
|
# litellm.set_verbose = True
|
||||||
|
# from litellm._logging import verbose_logger
|
||||||
|
# import logging
|
||||||
|
|
||||||
|
# verbose_logger.setLevel(level=logging.DEBUG)
|
||||||
|
|
||||||
|
# litellm.success_callback = ["s3"]
|
||||||
|
# litellm.s3_callback_params = {
|
||||||
|
# "s3_bucket_name": "litellm-r2-bucket",
|
||||||
|
# "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
|
||||||
|
# "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
|
||||||
|
# "s3_endpoint_url": "os.environ/R2_S3_URL",
|
||||||
|
# "s3_region_name": "os.environ/R2_S3_REGION_NAME",
|
||||||
|
# }
|
||||||
|
# print("Testing async s3 logging")
|
||||||
|
|
||||||
|
# expected_keys = []
|
||||||
|
|
||||||
|
# import time
|
||||||
|
|
||||||
|
# curr_time = str(time.time())
|
||||||
|
|
||||||
|
# async def _test():
|
||||||
|
# return await litellm.acompletion(
|
||||||
|
# model="gpt-3.5-turbo",
|
||||||
|
# messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||||
|
# max_tokens=10,
|
||||||
|
# temperature=0.7,
|
||||||
|
# user="ishaan-2",
|
||||||
|
# )
|
||||||
|
|
||||||
|
# response = asyncio.run(_test())
|
||||||
|
# print(f"response: {response}")
|
||||||
|
# expected_keys.append(response.id)
|
||||||
|
|
||||||
|
# import boto3
|
||||||
|
|
||||||
|
# s3 = boto3.client(
|
||||||
|
# "s3",
|
||||||
|
# endpoint_url=os.getenv("R2_S3_URL"),
|
||||||
|
# region_name=os.getenv("R2_S3_REGION_NAME"),
|
||||||
|
# aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
|
||||||
|
# aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
|
||||||
|
# )
|
||||||
|
|
||||||
|
# bucket_name = "litellm-r2-bucket"
|
||||||
|
# # List objects in the bucket
|
||||||
|
# response = s3.list_objects(Bucket=bucket_name)
|
||||||
|
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"An exception occurred - {e}")
|
||||||
|
# finally:
|
||||||
|
# # post, close log file and verify
|
||||||
|
# # Reset stdout to the original value
|
||||||
|
# print("Passed! Testing async s3 logging")
|
||||||
|
|
|
@ -203,6 +203,7 @@ async def test_hf_completion_tgi():
|
||||||
# test_get_cloudflare_response_streaming()
|
# test_get_cloudflare_response_streaming()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_completion_sagemaker():
|
async def test_completion_sagemaker():
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
|
|
@ -1,258 +1,258 @@
|
||||||
import sys, os
|
# import sys, os
|
||||||
import traceback
|
# import traceback
|
||||||
from dotenv import load_dotenv
|
# from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
# load_dotenv()
|
||||||
import os, io
|
# import os, io
|
||||||
|
|
||||||
sys.path.insert(
|
# sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
# 0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
# ) # Adds the parent directory to the system path
|
||||||
import pytest
|
# import pytest
|
||||||
import litellm
|
# import litellm
|
||||||
from litellm import embedding, completion, completion_cost, Timeout
|
# from litellm import embedding, completion, completion_cost, Timeout
|
||||||
from litellm import RateLimitError
|
# from litellm import RateLimitError
|
||||||
|
|
||||||
# litellm.num_retries = 3
|
# # litellm.num_retries = 3
|
||||||
litellm.cache = None
|
# litellm.cache = None
|
||||||
litellm.success_callback = []
|
# litellm.success_callback = []
|
||||||
user_message = "Write a short poem about the sky"
|
# user_message = "Write a short poem about the sky"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
# messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
# @pytest.fixture(autouse=True)
|
||||||
def reset_callbacks():
|
# def reset_callbacks():
|
||||||
print("\npytest fixture - resetting callbacks")
|
# print("\npytest fixture - resetting callbacks")
|
||||||
litellm.success_callback = []
|
# litellm.success_callback = []
|
||||||
litellm._async_success_callback = []
|
# litellm._async_success_callback = []
|
||||||
litellm.failure_callback = []
|
# litellm.failure_callback = []
|
||||||
litellm.callbacks = []
|
# litellm.callbacks = []
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude_completion_auth():
|
# def test_completion_bedrock_claude_completion_auth():
|
||||||
print("calling bedrock claude completion params auth")
|
# print("calling bedrock claude completion params auth")
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
|
|
||||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||||
os.environ.pop("AWS_REGION_NAME", None)
|
# os.environ.pop("AWS_REGION_NAME", None)
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
# model="bedrock/anthropic.claude-instant-v1",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.1,
|
# temperature=0.1,
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_region_name=aws_region_name,
|
# aws_region_name=aws_region_name,
|
||||||
)
|
# )
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
|
|
||||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||||
except RateLimitError:
|
# except RateLimitError:
|
||||||
pass
|
# pass
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_claude_completion_auth()
|
# # test_completion_bedrock_claude_completion_auth()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude_2_1_completion_auth():
|
# def test_completion_bedrock_claude_2_1_completion_auth():
|
||||||
print("calling bedrock claude 2.1 completion params auth")
|
# print("calling bedrock claude 2.1 completion params auth")
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
|
|
||||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||||
os.environ.pop("AWS_REGION_NAME", None)
|
# os.environ.pop("AWS_REGION_NAME", None)
|
||||||
try:
|
# try:
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="bedrock/anthropic.claude-v2:1",
|
# model="bedrock/anthropic.claude-v2:1",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.1,
|
# temperature=0.1,
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_region_name=aws_region_name,
|
# aws_region_name=aws_region_name,
|
||||||
)
|
# )
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
|
|
||||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||||
except RateLimitError:
|
# except RateLimitError:
|
||||||
pass
|
# pass
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_claude_2_1_completion_auth()
|
# # test_completion_bedrock_claude_2_1_completion_auth()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude_external_client_auth():
|
# def test_completion_bedrock_claude_external_client_auth():
|
||||||
print("\ncalling bedrock claude external client auth")
|
# print("\ncalling bedrock claude external client auth")
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
|
||||||
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
|
|
||||||
os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
# os.environ.pop("AWS_ACCESS_KEY_ID", None)
|
||||||
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
# os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
|
||||||
os.environ.pop("AWS_REGION_NAME", None)
|
# os.environ.pop("AWS_REGION_NAME", None)
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
import boto3
|
# import boto3
|
||||||
|
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
|
|
||||||
bedrock = boto3.client(
|
# bedrock = boto3.client(
|
||||||
service_name="bedrock-runtime",
|
# service_name="bedrock-runtime",
|
||||||
region_name=aws_region_name,
|
# region_name=aws_region_name,
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
|
# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
|
||||||
)
|
# )
|
||||||
|
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
# model="bedrock/anthropic.claude-instant-v1",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.1,
|
# temperature=0.1,
|
||||||
aws_bedrock_client=bedrock,
|
# aws_bedrock_client=bedrock,
|
||||||
)
|
# )
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
|
|
||||||
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
|
||||||
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
|
||||||
os.environ["AWS_REGION_NAME"] = aws_region_name
|
# os.environ["AWS_REGION_NAME"] = aws_region_name
|
||||||
except RateLimitError:
|
# except RateLimitError:
|
||||||
pass
|
# pass
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_claude_external_client_auth()
|
# # test_completion_bedrock_claude_external_client_auth()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Expired token, need to renew")
|
# @pytest.mark.skip(reason="Expired token, need to renew")
|
||||||
def test_completion_bedrock_claude_sts_client_auth():
|
# def test_completion_bedrock_claude_sts_client_auth():
|
||||||
print("\ncalling bedrock claude external client auth")
|
# print("\ncalling bedrock claude external client auth")
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
|
# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
|
||||||
aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
|
# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
# aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
import boto3
|
# import boto3
|
||||||
|
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
|
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
# model="bedrock/anthropic.claude-instant-v1",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
max_tokens=10,
|
# max_tokens=10,
|
||||||
temperature=0.1,
|
# temperature=0.1,
|
||||||
aws_region_name=aws_region_name,
|
# aws_region_name=aws_region_name,
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_role_name=aws_role_name,
|
# aws_role_name=aws_role_name,
|
||||||
aws_session_name="my-test-session",
|
# aws_session_name="my-test-session",
|
||||||
)
|
# )
|
||||||
|
|
||||||
response = embedding(
|
# response = embedding(
|
||||||
model="cohere.embed-multilingual-v3",
|
# model="cohere.embed-multilingual-v3",
|
||||||
input=["hello world"],
|
# input=["hello world"],
|
||||||
aws_region_name="us-east-1",
|
# aws_region_name="us-east-1",
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_role_name=aws_role_name,
|
# aws_role_name=aws_role_name,
|
||||||
aws_session_name="my-test-session",
|
# aws_session_name="my-test-session",
|
||||||
)
|
# )
|
||||||
|
|
||||||
response = completion(
|
# response = completion(
|
||||||
model="gpt-3.5-turbo",
|
# model="gpt-3.5-turbo",
|
||||||
messages=messages,
|
# messages=messages,
|
||||||
aws_region_name="us-east-1",
|
# aws_region_name="us-east-1",
|
||||||
aws_access_key_id=aws_access_key_id,
|
# aws_access_key_id=aws_access_key_id,
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
# aws_secret_access_key=aws_secret_access_key,
|
||||||
aws_role_name=aws_role_name,
|
# aws_role_name=aws_role_name,
|
||||||
aws_session_name="my-test-session",
|
# aws_session_name="my-test-session",
|
||||||
)
|
# )
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
except RateLimitError:
|
# except RateLimitError:
|
||||||
pass
|
# pass
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_claude_sts_client_auth()
|
# # test_completion_bedrock_claude_sts_client_auth()
|
||||||
|
|
||||||
|
|
||||||
def test_provisioned_throughput():
|
# def test_provisioned_throughput():
|
||||||
try:
|
# try:
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
import botocore, json, io
|
# import botocore, json, io
|
||||||
import botocore.session
|
# import botocore.session
|
||||||
from botocore.stub import Stubber
|
# from botocore.stub import Stubber
|
||||||
|
|
||||||
bedrock_client = botocore.session.get_session().create_client(
|
# bedrock_client = botocore.session.get_session().create_client(
|
||||||
"bedrock-runtime", region_name="us-east-1"
|
# "bedrock-runtime", region_name="us-east-1"
|
||||||
)
|
# )
|
||||||
|
|
||||||
expected_params = {
|
# expected_params = {
|
||||||
"accept": "application/json",
|
# "accept": "application/json",
|
||||||
"body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
|
# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
|
||||||
'"max_tokens_to_sample": 256}',
|
# '"max_tokens_to_sample": 256}',
|
||||||
"contentType": "application/json",
|
# "contentType": "application/json",
|
||||||
"modelId": "provisioned-model-arn",
|
# "modelId": "provisioned-model-arn",
|
||||||
}
|
# }
|
||||||
response_from_bedrock = {
|
# response_from_bedrock = {
|
||||||
"body": io.StringIO(
|
# "body": io.StringIO(
|
||||||
json.dumps(
|
# json.dumps(
|
||||||
{
|
# {
|
||||||
"completion": " Here is a short poem about the sky:",
|
# "completion": " Here is a short poem about the sky:",
|
||||||
"stop_reason": "max_tokens",
|
# "stop_reason": "max_tokens",
|
||||||
"stop": None,
|
# "stop": None,
|
||||||
}
|
# }
|
||||||
)
|
# )
|
||||||
),
|
# ),
|
||||||
"contentType": "contentType",
|
# "contentType": "contentType",
|
||||||
"ResponseMetadata": {"HTTPStatusCode": 200},
|
# "ResponseMetadata": {"HTTPStatusCode": 200},
|
||||||
}
|
# }
|
||||||
|
|
||||||
with Stubber(bedrock_client) as stubber:
|
# with Stubber(bedrock_client) as stubber:
|
||||||
stubber.add_response(
|
# stubber.add_response(
|
||||||
"invoke_model",
|
# "invoke_model",
|
||||||
service_response=response_from_bedrock,
|
# service_response=response_from_bedrock,
|
||||||
expected_params=expected_params,
|
# expected_params=expected_params,
|
||||||
)
|
# )
|
||||||
response = litellm.completion(
|
# response = litellm.completion(
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
# model="bedrock/anthropic.claude-instant-v1",
|
||||||
model_id="provisioned-model-arn",
|
# model_id="provisioned-model-arn",
|
||||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
# messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||||
aws_bedrock_client=bedrock_client,
|
# aws_bedrock_client=bedrock_client,
|
||||||
)
|
# )
|
||||||
print("response stubbed", response)
|
# print("response stubbed", response)
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_provisioned_throughput()
|
# # test_provisioned_throughput()
|
||||||
|
|
|
@ -546,6 +546,7 @@ def test_redis_cache_acompletion_stream():
|
||||||
# test_redis_cache_acompletion_stream()
|
# test_redis_cache_acompletion_stream()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_redis_cache_acompletion_stream_bedrock():
|
def test_redis_cache_acompletion_stream_bedrock():
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
|
|
|
@ -1404,6 +1404,7 @@ def test_customprompt_together_ai():
|
||||||
# test_customprompt_together_ai()
|
# test_customprompt_together_ai()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_sagemaker():
|
def test_completion_sagemaker():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -1429,6 +1430,7 @@ def test_completion_sagemaker():
|
||||||
# test_completion_sagemaker()
|
# test_completion_sagemaker()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_sagemaker_stream():
|
def test_completion_sagemaker_stream():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
@ -1459,6 +1461,7 @@ def test_completion_sagemaker_stream():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_chat_sagemaker():
|
def test_completion_chat_sagemaker():
|
||||||
try:
|
try:
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
@ -1483,6 +1486,7 @@ def test_completion_chat_sagemaker():
|
||||||
# test_completion_chat_sagemaker()
|
# test_completion_chat_sagemaker()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_chat_sagemaker_mistral():
|
def test_completion_chat_sagemaker_mistral():
|
||||||
try:
|
try:
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
@ -1501,6 +1505,7 @@ def test_completion_chat_sagemaker_mistral():
|
||||||
# test_completion_chat_sagemaker_mistral()
|
# test_completion_chat_sagemaker_mistral()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_titan_null_response():
|
def test_completion_bedrock_titan_null_response():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
|
@ -1526,6 +1531,7 @@ def test_completion_bedrock_titan_null_response():
|
||||||
pytest.fail(f"An error occurred - {str(e)}")
|
pytest.fail(f"An error occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_titan():
|
def test_completion_bedrock_titan():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
|
@ -1568,6 +1574,7 @@ def test_completion_bedrock_claude():
|
||||||
# test_completion_bedrock_claude()
|
# test_completion_bedrock_claude()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_cohere():
|
def test_completion_bedrock_cohere():
|
||||||
print("calling bedrock cohere")
|
print("calling bedrock cohere")
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
|
@ -171,6 +171,7 @@ def test_cost_openai_image_gen():
|
||||||
assert cost == 0.019922944
|
assert cost == 0.019922944
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_cost_bedrock_pricing():
|
def test_cost_bedrock_pricing():
|
||||||
"""
|
"""
|
||||||
- get pricing specific to region for a model
|
- get pricing specific to region for a model
|
||||||
|
|
|
@ -478,6 +478,7 @@ async def test_async_chat_azure_stream():
|
||||||
|
|
||||||
|
|
||||||
## Test Bedrock + sync
|
## Test Bedrock + sync
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_chat_bedrock_stream():
|
def test_chat_bedrock_stream():
|
||||||
try:
|
try:
|
||||||
customHandler = CompletionCustomHandler()
|
customHandler = CompletionCustomHandler()
|
||||||
|
@ -518,6 +519,7 @@ def test_chat_bedrock_stream():
|
||||||
|
|
||||||
|
|
||||||
## Test Bedrock + Async
|
## Test Bedrock + Async
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_chat_bedrock_stream():
|
async def test_async_chat_bedrock_stream():
|
||||||
try:
|
try:
|
||||||
|
@ -561,6 +563,7 @@ async def test_async_chat_bedrock_stream():
|
||||||
|
|
||||||
|
|
||||||
## Test Sagemaker + Async
|
## Test Sagemaker + Async
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_chat_sagemaker_stream():
|
async def test_async_chat_sagemaker_stream():
|
||||||
try:
|
try:
|
||||||
|
@ -793,6 +796,7 @@ async def test_async_embedding_azure():
|
||||||
|
|
||||||
|
|
||||||
## Test Bedrock + Async
|
## Test Bedrock + Async
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_embedding_bedrock():
|
async def test_async_embedding_bedrock():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -388,6 +388,7 @@ async def test_async_custom_handler_embedding_optional_param():
|
||||||
# asyncio.run(test_async_custom_handler_embedding_optional_param())
|
# asyncio.run(test_async_custom_handler_embedding_optional_param())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Account suspended. Pending their approval")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_custom_handler_embedding_optional_param_bedrock():
|
async def test_async_custom_handler_embedding_optional_param_bedrock():
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -256,6 +256,7 @@ async def test_vertexai_aembedding():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_bedrock_embedding_titan():
|
def test_bedrock_embedding_titan():
|
||||||
try:
|
try:
|
||||||
# this tests if we support str input for bedrock embedding
|
# this tests if we support str input for bedrock embedding
|
||||||
|
@ -422,6 +423,7 @@ def test_aembedding_azure():
|
||||||
# test_aembedding_azure()
|
# test_aembedding_azure()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_sagemaker_embeddings():
|
def test_sagemaker_embeddings():
|
||||||
try:
|
try:
|
||||||
response = litellm.embedding(
|
response = litellm.embedding(
|
||||||
|
@ -438,6 +440,7 @@ def test_sagemaker_embeddings():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_sagemaker_aembeddings():
|
async def test_sagemaker_aembeddings():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -42,6 +42,7 @@ exception_models = [
|
||||||
|
|
||||||
|
|
||||||
# Test 1: Context Window Errors
|
# Test 1: Context Window Errors
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.parametrize("model", exception_models)
|
@pytest.mark.parametrize("model", exception_models)
|
||||||
def test_context_window(model):
|
def test_context_window(model):
|
||||||
print("Testing context window error")
|
print("Testing context window error")
|
||||||
|
@ -120,9 +121,9 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
|
||||||
os.environ["AI21_API_KEY"] = "bad-key"
|
os.environ["AI21_API_KEY"] = "bad-key"
|
||||||
elif "togethercomputer" in model:
|
elif "togethercomputer" in model:
|
||||||
temporary_key = os.environ["TOGETHERAI_API_KEY"]
|
temporary_key = os.environ["TOGETHERAI_API_KEY"]
|
||||||
os.environ[
|
os.environ["TOGETHERAI_API_KEY"] = (
|
||||||
"TOGETHERAI_API_KEY"
|
"84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
|
||||||
] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
|
)
|
||||||
elif model in litellm.openrouter_models:
|
elif model in litellm.openrouter_models:
|
||||||
temporary_key = os.environ["OPENROUTER_API_KEY"]
|
temporary_key = os.environ["OPENROUTER_API_KEY"]
|
||||||
os.environ["OPENROUTER_API_KEY"] = "bad-key"
|
os.environ["OPENROUTER_API_KEY"] = "bad-key"
|
||||||
|
|
|
@ -87,6 +87,7 @@ async def test_azure_img_gen_health_check():
|
||||||
# asyncio.run(test_azure_img_gen_health_check())
|
# asyncio.run(test_azure_img_gen_health_check())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_sagemaker_embedding_health_check():
|
async def test_sagemaker_embedding_health_check():
|
||||||
response = await litellm.ahealth_check(
|
response = await litellm.ahealth_check(
|
||||||
|
|
|
@ -121,6 +121,7 @@ async def test_async_image_generation_azure():
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_image_generation_bedrock():
|
def test_image_generation_bedrock():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -141,6 +142,7 @@ def test_image_generation_bedrock():
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_aimage_generation_bedrock_with_optional_params():
|
async def test_aimage_generation_bedrock_with_optional_params():
|
||||||
try:
|
try:
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -12,6 +12,7 @@ import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_sagemaker():
|
def test_completion_sagemaker():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
litellm.drop_params = True
|
litellm.drop_params = True
|
||||||
|
|
|
@ -473,6 +473,7 @@ def aleph_alpha_test_completion():
|
||||||
# Sagemaker
|
# Sagemaker
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def sagemaker_test_completion():
|
def sagemaker_test_completion():
|
||||||
litellm.SagemakerConfig(max_new_tokens=10)
|
litellm.SagemakerConfig(max_new_tokens=10)
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
@ -514,6 +515,7 @@ def sagemaker_test_completion():
|
||||||
# Bedrock
|
# Bedrock
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def bedrock_test_completion():
|
def bedrock_test_completion():
|
||||||
litellm.AmazonCohereConfig(max_tokens=10)
|
litellm.AmazonCohereConfig(max_tokens=10)
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
|
|
@ -166,14 +166,6 @@ def test_call_one_endpoint():
|
||||||
"tpm": 240000,
|
"tpm": 240000,
|
||||||
"rpm": 1800,
|
"rpm": 1800,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"model_name": "claude-v1",
|
|
||||||
"litellm_params": {
|
|
||||||
"model": "bedrock/anthropic.claude-instant-v1",
|
|
||||||
},
|
|
||||||
"tpm": 100000,
|
|
||||||
"rpm": 10000,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"model_name": "text-embedding-ada-002",
|
"model_name": "text-embedding-ada-002",
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
|
@ -202,15 +194,6 @@ def test_call_one_endpoint():
|
||||||
)
|
)
|
||||||
print("\n response", response)
|
print("\n response", response)
|
||||||
|
|
||||||
async def call_bedrock_claude():
|
|
||||||
response = await router.acompletion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{"role": "user", "content": "hello this request will pass"}],
|
|
||||||
specific_deployment=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
print("\n response", response)
|
|
||||||
|
|
||||||
async def call_azure_embedding():
|
async def call_azure_embedding():
|
||||||
response = await router.aembedding(
|
response = await router.aembedding(
|
||||||
model="azure/azure-embedding-model",
|
model="azure/azure-embedding-model",
|
||||||
|
@ -221,7 +204,6 @@ def test_call_one_endpoint():
|
||||||
print("\n response", response)
|
print("\n response", response)
|
||||||
|
|
||||||
asyncio.run(call_azure_completion())
|
asyncio.run(call_azure_completion())
|
||||||
asyncio.run(call_bedrock_claude())
|
|
||||||
asyncio.run(call_azure_embedding())
|
asyncio.run(call_azure_embedding())
|
||||||
|
|
||||||
os.environ["AZURE_API_BASE"] = old_api_base
|
os.environ["AZURE_API_BASE"] = old_api_base
|
||||||
|
@ -593,6 +575,7 @@ def test_azure_embedding_on_router():
|
||||||
# test_azure_embedding_on_router()
|
# test_azure_embedding_on_router()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_bedrock_on_router():
|
def test_bedrock_on_router():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
print("\n Testing bedrock on router\n")
|
print("\n Testing bedrock on router\n")
|
||||||
|
|
|
@ -87,6 +87,7 @@ def test_router_timeouts():
|
||||||
print("********** TOKENS USED SO FAR = ", total_tokens_used)
|
print("********** TOKENS USED SO FAR = ", total_tokens_used)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_router_timeouts_bedrock():
|
async def test_router_timeouts_bedrock():
|
||||||
import openai
|
import openai
|
||||||
|
|
|
@ -764,6 +764,7 @@ def test_completion_replicate_stream_bad_key():
|
||||||
# test_completion_replicate_stream_bad_key()
|
# test_completion_replicate_stream_bad_key()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_claude_stream():
|
def test_completion_bedrock_claude_stream():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
@ -810,6 +811,7 @@ def test_completion_bedrock_claude_stream():
|
||||||
# test_completion_bedrock_claude_stream()
|
# test_completion_bedrock_claude_stream()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_bedrock_ai21_stream():
|
def test_completion_bedrock_ai21_stream():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
@ -911,6 +913,7 @@ def test_sagemaker_weird_response():
|
||||||
# test_sagemaker_weird_response()
|
# test_sagemaker_weird_response()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_sagemaker_streaming_async():
|
async def test_sagemaker_streaming_async():
|
||||||
try:
|
try:
|
||||||
|
@ -949,6 +952,7 @@ async def test_sagemaker_streaming_async():
|
||||||
# asyncio.run(test_sagemaker_streaming_async())
|
# asyncio.run(test_sagemaker_streaming_async())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
def test_completion_sagemaker_stream():
|
def test_completion_sagemaker_stream():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue