diff --git a/litellm/tests/test_amazing_s3_logs.py b/litellm/tests/test_amazing_s3_logs.py index 7b8eb4a47d..74d6eb5b94 100644 --- a/litellm/tests/test_amazing_s3_logs.py +++ b/litellm/tests/test_amazing_s3_logs.py @@ -1,253 +1,254 @@ -import sys -import os -import io, asyncio +## @pytest.mark.skip(reason="AWS Suspended Account") +# import sys +# import os +# import io, asyncio -# import logging -# logging.basicConfig(level=logging.DEBUG) -sys.path.insert(0, os.path.abspath("../..")) +# # import logging +# # logging.basicConfig(level=logging.DEBUG) +# sys.path.insert(0, os.path.abspath("../..")) -from litellm import completion -import litellm +# from litellm import completion +# import litellm -litellm.num_retries = 3 +# litellm.num_retries = 3 -import time, random -import pytest +# import time, random +# import pytest -def test_s3_logging(): - # all s3 requests need to be in one test function - # since we are modifying stdout, and pytests runs tests in parallel - # on circle ci - we only test litellm.acompletion() - try: - # redirect stdout to log_file - litellm.cache = litellm.Cache( - type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2" - ) +# def test_s3_logging(): +# # all s3 requests need to be in one test function +# # since we are modifying stdout, and pytests runs tests in parallel +# # on circle ci - we only test litellm.acompletion() +# try: +# # redirect stdout to log_file +# litellm.cache = litellm.Cache( +# type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2" +# ) - litellm.success_callback = ["s3"] - litellm.s3_callback_params = { - "s3_bucket_name": "litellm-logs", - "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY", - "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID", - } - litellm.set_verbose = True +# litellm.success_callback = ["s3"] +# litellm.s3_callback_params = { +# "s3_bucket_name": "litellm-logs", +# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY", +# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID", +# } +# litellm.set_verbose = True - print("Testing async s3 logging") +# print("Testing async s3 logging") - expected_keys = [] +# expected_keys = [] - import time +# import time - curr_time = str(time.time()) +# curr_time = str(time.time()) - async def _test(): - return await litellm.acompletion( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": f"This is a test {curr_time}"}], - max_tokens=10, - temperature=0.7, - user="ishaan-2", - ) +# async def _test(): +# return await litellm.acompletion( +# model="gpt-3.5-turbo", +# messages=[{"role": "user", "content": f"This is a test {curr_time}"}], +# max_tokens=10, +# temperature=0.7, +# user="ishaan-2", +# ) - response = asyncio.run(_test()) - print(f"response: {response}") - expected_keys.append(response.id) +# response = asyncio.run(_test()) +# print(f"response: {response}") +# expected_keys.append(response.id) - async def _test(): - return await litellm.acompletion( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": f"This is a test {curr_time}"}], - max_tokens=10, - temperature=0.7, - user="ishaan-2", - ) +# async def _test(): +# return await litellm.acompletion( +# model="gpt-3.5-turbo", +# messages=[{"role": "user", "content": f"This is a test {curr_time}"}], +# max_tokens=10, +# temperature=0.7, +# user="ishaan-2", +# ) - response = asyncio.run(_test()) - expected_keys.append(response.id) - print(f"response: {response}") - time.sleep(5) # wait 5s for logs to land +# response = asyncio.run(_test()) +# expected_keys.append(response.id) +# print(f"response: {response}") +# time.sleep(5) # wait 5s for logs to land - import boto3 +# import boto3 - s3 = boto3.client("s3") - bucket_name = "litellm-logs" - # List objects in the bucket - response = s3.list_objects(Bucket=bucket_name) +# s3 = boto3.client("s3") +# bucket_name = "litellm-logs" +# # List objects in the bucket +# response = s3.list_objects(Bucket=bucket_name) - # Sort the objects based on the LastModified timestamp - objects = sorted( - response["Contents"], key=lambda x: x["LastModified"], reverse=True - ) - # Get the keys of the most recent objects - most_recent_keys = [obj["Key"] for obj in objects] - print(most_recent_keys) - # for each key, get the part before "-" as the key. Do it safely - cleaned_keys = [] - for key in most_recent_keys: - split_key = key.split("_") - if len(split_key) < 2: - continue - cleaned_keys.append(split_key[1]) - print("\n most recent keys", most_recent_keys) - print("\n cleaned keys", cleaned_keys) - print("\n Expected keys: ", expected_keys) - matches = 0 - for key in expected_keys: - key += ".json" - assert key in cleaned_keys +# # Sort the objects based on the LastModified timestamp +# objects = sorted( +# response["Contents"], key=lambda x: x["LastModified"], reverse=True +# ) +# # Get the keys of the most recent objects +# most_recent_keys = [obj["Key"] for obj in objects] +# print(most_recent_keys) +# # for each key, get the part before "-" as the key. Do it safely +# cleaned_keys = [] +# for key in most_recent_keys: +# split_key = key.split("_") +# if len(split_key) < 2: +# continue +# cleaned_keys.append(split_key[1]) +# print("\n most recent keys", most_recent_keys) +# print("\n cleaned keys", cleaned_keys) +# print("\n Expected keys: ", expected_keys) +# matches = 0 +# for key in expected_keys: +# key += ".json" +# assert key in cleaned_keys - if key in cleaned_keys: - matches += 1 - # remove the match key - cleaned_keys.remove(key) - # this asserts we log, the first request + the 2nd cached request - print("we had two matches ! passed ", matches) - assert matches == 2 - try: - # cleanup s3 bucket in test - for key in most_recent_keys: - s3.delete_object(Bucket=bucket_name, Key=key) - except: - # don't let cleanup fail a test - pass - except Exception as e: - pytest.fail(f"An exception occurred - {e}") - finally: - # post, close log file and verify - # Reset stdout to the original value - print("Passed! Testing async s3 logging") +# if key in cleaned_keys: +# matches += 1 +# # remove the match key +# cleaned_keys.remove(key) +# # this asserts we log, the first request + the 2nd cached request +# print("we had two matches ! passed ", matches) +# assert matches == 2 +# try: +# # cleanup s3 bucket in test +# for key in most_recent_keys: +# s3.delete_object(Bucket=bucket_name, Key=key) +# except: +# # don't let cleanup fail a test +# pass +# except Exception as e: +# pytest.fail(f"An exception occurred - {e}") +# finally: +# # post, close log file and verify +# # Reset stdout to the original value +# print("Passed! Testing async s3 logging") -# test_s3_logging() +# # test_s3_logging() -def test_s3_logging_async(): - # this tests time added to make s3 logging calls, vs just acompletion calls - try: - litellm.set_verbose = True - # Make 5 calls with an empty success_callback - litellm.success_callback = [] - start_time_empty_callback = asyncio.run(make_async_calls()) - print("done with no callback test") +# def test_s3_logging_async(): +# # this tests time added to make s3 logging calls, vs just acompletion calls +# try: +# litellm.set_verbose = True +# # Make 5 calls with an empty success_callback +# litellm.success_callback = [] +# start_time_empty_callback = asyncio.run(make_async_calls()) +# print("done with no callback test") - print("starting s3 logging load test") - # Make 5 calls with success_callback set to "langfuse" - litellm.success_callback = ["s3"] - litellm.s3_callback_params = { - "s3_bucket_name": "litellm-logs", - "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY", - "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID", - } - start_time_s3 = asyncio.run(make_async_calls()) - print("done with s3 test") +# print("starting s3 logging load test") +# # Make 5 calls with success_callback set to "langfuse" +# litellm.success_callback = ["s3"] +# litellm.s3_callback_params = { +# "s3_bucket_name": "litellm-logs", +# "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY", +# "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID", +# } +# start_time_s3 = asyncio.run(make_async_calls()) +# print("done with s3 test") - # Compare the time for both scenarios - print(f"Time taken with success_callback='s3': {start_time_s3}") - print(f"Time taken with empty success_callback: {start_time_empty_callback}") +# # Compare the time for both scenarios +# print(f"Time taken with success_callback='s3': {start_time_s3}") +# print(f"Time taken with empty success_callback: {start_time_empty_callback}") - # assert the diff is not more than 1 second - assert abs(start_time_s3 - start_time_empty_callback) < 1 +# # assert the diff is not more than 1 second +# assert abs(start_time_s3 - start_time_empty_callback) < 1 - except litellm.Timeout as e: - pass - except Exception as e: - pytest.fail(f"An exception occurred - {e}") +# except litellm.Timeout as e: +# pass +# except Exception as e: +# pytest.fail(f"An exception occurred - {e}") -async def make_async_calls(): - tasks = [] - for _ in range(5): - task = asyncio.create_task( - litellm.acompletion( - model="azure/chatgpt-v-2", - messages=[{"role": "user", "content": "This is a test"}], - max_tokens=5, - temperature=0.7, - timeout=5, - user="langfuse_latency_test_user", - mock_response="It's simple to use and easy to get started", - ) - ) - tasks.append(task) +# async def make_async_calls(): +# tasks = [] +# for _ in range(5): +# task = asyncio.create_task( +# litellm.acompletion( +# model="azure/chatgpt-v-2", +# messages=[{"role": "user", "content": "This is a test"}], +# max_tokens=5, +# temperature=0.7, +# timeout=5, +# user="langfuse_latency_test_user", +# mock_response="It's simple to use and easy to get started", +# ) +# ) +# tasks.append(task) - # Measure the start time before running the tasks - start_time = asyncio.get_event_loop().time() +# # Measure the start time before running the tasks +# start_time = asyncio.get_event_loop().time() - # Wait for all tasks to complete - responses = await asyncio.gather(*tasks) +# # Wait for all tasks to complete +# responses = await asyncio.gather(*tasks) - # Print the responses when tasks return - for idx, response in enumerate(responses): - print(f"Response from Task {idx + 1}: {response}") +# # Print the responses when tasks return +# for idx, response in enumerate(responses): +# print(f"Response from Task {idx + 1}: {response}") - # Calculate the total time taken - total_time = asyncio.get_event_loop().time() - start_time +# # Calculate the total time taken +# total_time = asyncio.get_event_loop().time() - start_time - return total_time +# return total_time -def test_s3_logging_r2(): - # all s3 requests need to be in one test function - # since we are modifying stdout, and pytests runs tests in parallel - # on circle ci - we only test litellm.acompletion() - try: - # redirect stdout to log_file - # litellm.cache = litellm.Cache( - # type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2" - # ) - litellm.set_verbose = True - from litellm._logging import verbose_logger - import logging +# def test_s3_logging_r2(): +# # all s3 requests need to be in one test function +# # since we are modifying stdout, and pytests runs tests in parallel +# # on circle ci - we only test litellm.acompletion() +# try: +# # redirect stdout to log_file +# # litellm.cache = litellm.Cache( +# # type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2" +# # ) +# litellm.set_verbose = True +# from litellm._logging import verbose_logger +# import logging - verbose_logger.setLevel(level=logging.DEBUG) +# verbose_logger.setLevel(level=logging.DEBUG) - litellm.success_callback = ["s3"] - litellm.s3_callback_params = { - "s3_bucket_name": "litellm-r2-bucket", - "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY", - "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID", - "s3_endpoint_url": "os.environ/R2_S3_URL", - "s3_region_name": "os.environ/R2_S3_REGION_NAME", - } - print("Testing async s3 logging") +# litellm.success_callback = ["s3"] +# litellm.s3_callback_params = { +# "s3_bucket_name": "litellm-r2-bucket", +# "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY", +# "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID", +# "s3_endpoint_url": "os.environ/R2_S3_URL", +# "s3_region_name": "os.environ/R2_S3_REGION_NAME", +# } +# print("Testing async s3 logging") - expected_keys = [] +# expected_keys = [] - import time +# import time - curr_time = str(time.time()) +# curr_time = str(time.time()) - async def _test(): - return await litellm.acompletion( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": f"This is a test {curr_time}"}], - max_tokens=10, - temperature=0.7, - user="ishaan-2", - ) +# async def _test(): +# return await litellm.acompletion( +# model="gpt-3.5-turbo", +# messages=[{"role": "user", "content": f"This is a test {curr_time}"}], +# max_tokens=10, +# temperature=0.7, +# user="ishaan-2", +# ) - response = asyncio.run(_test()) - print(f"response: {response}") - expected_keys.append(response.id) +# response = asyncio.run(_test()) +# print(f"response: {response}") +# expected_keys.append(response.id) - import boto3 +# import boto3 - s3 = boto3.client( - "s3", - endpoint_url=os.getenv("R2_S3_URL"), - region_name=os.getenv("R2_S3_REGION_NAME"), - aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"), - aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"), - ) +# s3 = boto3.client( +# "s3", +# endpoint_url=os.getenv("R2_S3_URL"), +# region_name=os.getenv("R2_S3_REGION_NAME"), +# aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"), +# aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"), +# ) - bucket_name = "litellm-r2-bucket" - # List objects in the bucket - response = s3.list_objects(Bucket=bucket_name) +# bucket_name = "litellm-r2-bucket" +# # List objects in the bucket +# response = s3.list_objects(Bucket=bucket_name) - except Exception as e: - pytest.fail(f"An exception occurred - {e}") - finally: - # post, close log file and verify - # Reset stdout to the original value - print("Passed! Testing async s3 logging") +# except Exception as e: +# pytest.fail(f"An exception occurred - {e}") +# finally: +# # post, close log file and verify +# # Reset stdout to the original value +# print("Passed! Testing async s3 logging") diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py index 86cbfafbf1..d6ff9aa872 100644 --- a/litellm/tests/test_async_fn.py +++ b/litellm/tests/test_async_fn.py @@ -203,6 +203,7 @@ async def test_hf_completion_tgi(): # test_get_cloudflare_response_streaming() +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_completion_sagemaker(): # litellm.set_verbose=True diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py index 7df1fcc485..6b31c8a061 100644 --- a/litellm/tests/test_bedrock_completion.py +++ b/litellm/tests/test_bedrock_completion.py @@ -1,258 +1,258 @@ -import sys, os -import traceback -from dotenv import load_dotenv +# import sys, os +# import traceback +# from dotenv import load_dotenv -load_dotenv() -import os, io +# load_dotenv() +# import os, io -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system path -import pytest -import litellm -from litellm import embedding, completion, completion_cost, Timeout -from litellm import RateLimitError +# sys.path.insert( +# 0, os.path.abspath("../..") +# ) # Adds the parent directory to the system path +# import pytest +# import litellm +# from litellm import embedding, completion, completion_cost, Timeout +# from litellm import RateLimitError -# litellm.num_retries = 3 -litellm.cache = None -litellm.success_callback = [] -user_message = "Write a short poem about the sky" -messages = [{"content": user_message, "role": "user"}] +# # litellm.num_retries = 3 +# litellm.cache = None +# litellm.success_callback = [] +# user_message = "Write a short poem about the sky" +# messages = [{"content": user_message, "role": "user"}] -@pytest.fixture(autouse=True) -def reset_callbacks(): - print("\npytest fixture - resetting callbacks") - litellm.success_callback = [] - litellm._async_success_callback = [] - litellm.failure_callback = [] - litellm.callbacks = [] +# @pytest.fixture(autouse=True) +# def reset_callbacks(): +# print("\npytest fixture - resetting callbacks") +# litellm.success_callback = [] +# litellm._async_success_callback = [] +# litellm.failure_callback = [] +# litellm.callbacks = [] -def test_completion_bedrock_claude_completion_auth(): - print("calling bedrock claude completion params auth") - import os +# def test_completion_bedrock_claude_completion_auth(): +# print("calling bedrock claude completion params auth") +# import os - aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] - aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] - aws_region_name = os.environ["AWS_REGION_NAME"] +# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] +# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] +# aws_region_name = os.environ["AWS_REGION_NAME"] - os.environ.pop("AWS_ACCESS_KEY_ID", None) - os.environ.pop("AWS_SECRET_ACCESS_KEY", None) - os.environ.pop("AWS_REGION_NAME", None) +# os.environ.pop("AWS_ACCESS_KEY_ID", None) +# os.environ.pop("AWS_SECRET_ACCESS_KEY", None) +# os.environ.pop("AWS_REGION_NAME", None) - try: - response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=messages, - max_tokens=10, - temperature=0.1, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_region_name=aws_region_name, - ) - # Add any assertions here to check the response - print(response) +# try: +# response = completion( +# model="bedrock/anthropic.claude-instant-v1", +# messages=messages, +# max_tokens=10, +# temperature=0.1, +# aws_access_key_id=aws_access_key_id, +# aws_secret_access_key=aws_secret_access_key, +# aws_region_name=aws_region_name, +# ) +# # Add any assertions here to check the response +# print(response) - os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id - os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key - os.environ["AWS_REGION_NAME"] = aws_region_name - except RateLimitError: - pass - except Exception as e: - pytest.fail(f"Error occurred: {e}") +# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id +# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key +# os.environ["AWS_REGION_NAME"] = aws_region_name +# except RateLimitError: +# pass +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") -# test_completion_bedrock_claude_completion_auth() +# # test_completion_bedrock_claude_completion_auth() -def test_completion_bedrock_claude_2_1_completion_auth(): - print("calling bedrock claude 2.1 completion params auth") - import os +# def test_completion_bedrock_claude_2_1_completion_auth(): +# print("calling bedrock claude 2.1 completion params auth") +# import os - aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] - aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] - aws_region_name = os.environ["AWS_REGION_NAME"] +# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] +# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] +# aws_region_name = os.environ["AWS_REGION_NAME"] - os.environ.pop("AWS_ACCESS_KEY_ID", None) - os.environ.pop("AWS_SECRET_ACCESS_KEY", None) - os.environ.pop("AWS_REGION_NAME", None) - try: - response = completion( - model="bedrock/anthropic.claude-v2:1", - messages=messages, - max_tokens=10, - temperature=0.1, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_region_name=aws_region_name, - ) - # Add any assertions here to check the response - print(response) +# os.environ.pop("AWS_ACCESS_KEY_ID", None) +# os.environ.pop("AWS_SECRET_ACCESS_KEY", None) +# os.environ.pop("AWS_REGION_NAME", None) +# try: +# response = completion( +# model="bedrock/anthropic.claude-v2:1", +# messages=messages, +# max_tokens=10, +# temperature=0.1, +# aws_access_key_id=aws_access_key_id, +# aws_secret_access_key=aws_secret_access_key, +# aws_region_name=aws_region_name, +# ) +# # Add any assertions here to check the response +# print(response) - os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id - os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key - os.environ["AWS_REGION_NAME"] = aws_region_name - except RateLimitError: - pass - except Exception as e: - pytest.fail(f"Error occurred: {e}") +# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id +# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key +# os.environ["AWS_REGION_NAME"] = aws_region_name +# except RateLimitError: +# pass +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") -# test_completion_bedrock_claude_2_1_completion_auth() +# # test_completion_bedrock_claude_2_1_completion_auth() -def test_completion_bedrock_claude_external_client_auth(): - print("\ncalling bedrock claude external client auth") - import os +# def test_completion_bedrock_claude_external_client_auth(): +# print("\ncalling bedrock claude external client auth") +# import os - aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] - aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] - aws_region_name = os.environ["AWS_REGION_NAME"] +# aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] +# aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] +# aws_region_name = os.environ["AWS_REGION_NAME"] - os.environ.pop("AWS_ACCESS_KEY_ID", None) - os.environ.pop("AWS_SECRET_ACCESS_KEY", None) - os.environ.pop("AWS_REGION_NAME", None) +# os.environ.pop("AWS_ACCESS_KEY_ID", None) +# os.environ.pop("AWS_SECRET_ACCESS_KEY", None) +# os.environ.pop("AWS_REGION_NAME", None) - try: - import boto3 +# try: +# import boto3 - litellm.set_verbose = True +# litellm.set_verbose = True - bedrock = boto3.client( - service_name="bedrock-runtime", - region_name=aws_region_name, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com", - ) +# bedrock = boto3.client( +# service_name="bedrock-runtime", +# region_name=aws_region_name, +# aws_access_key_id=aws_access_key_id, +# aws_secret_access_key=aws_secret_access_key, +# endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com", +# ) - response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=messages, - max_tokens=10, - temperature=0.1, - aws_bedrock_client=bedrock, - ) - # Add any assertions here to check the response - print(response) +# response = completion( +# model="bedrock/anthropic.claude-instant-v1", +# messages=messages, +# max_tokens=10, +# temperature=0.1, +# aws_bedrock_client=bedrock, +# ) +# # Add any assertions here to check the response +# print(response) - os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id - os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key - os.environ["AWS_REGION_NAME"] = aws_region_name - except RateLimitError: - pass - except Exception as e: - pytest.fail(f"Error occurred: {e}") +# os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id +# os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key +# os.environ["AWS_REGION_NAME"] = aws_region_name +# except RateLimitError: +# pass +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") -# test_completion_bedrock_claude_external_client_auth() +# # test_completion_bedrock_claude_external_client_auth() -@pytest.mark.skip(reason="Expired token, need to renew") -def test_completion_bedrock_claude_sts_client_auth(): - print("\ncalling bedrock claude external client auth") - import os +# @pytest.mark.skip(reason="Expired token, need to renew") +# def test_completion_bedrock_claude_sts_client_auth(): +# print("\ncalling bedrock claude external client auth") +# import os - aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"] - aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"] - aws_region_name = os.environ["AWS_REGION_NAME"] - aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] +# aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"] +# aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"] +# aws_region_name = os.environ["AWS_REGION_NAME"] +# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] - try: - import boto3 +# try: +# import boto3 - litellm.set_verbose = True +# litellm.set_verbose = True - response = completion( - model="bedrock/anthropic.claude-instant-v1", - messages=messages, - max_tokens=10, - temperature=0.1, - aws_region_name=aws_region_name, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_role_name=aws_role_name, - aws_session_name="my-test-session", - ) +# response = completion( +# model="bedrock/anthropic.claude-instant-v1", +# messages=messages, +# max_tokens=10, +# temperature=0.1, +# aws_region_name=aws_region_name, +# aws_access_key_id=aws_access_key_id, +# aws_secret_access_key=aws_secret_access_key, +# aws_role_name=aws_role_name, +# aws_session_name="my-test-session", +# ) - response = embedding( - model="cohere.embed-multilingual-v3", - input=["hello world"], - aws_region_name="us-east-1", - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_role_name=aws_role_name, - aws_session_name="my-test-session", - ) +# response = embedding( +# model="cohere.embed-multilingual-v3", +# input=["hello world"], +# aws_region_name="us-east-1", +# aws_access_key_id=aws_access_key_id, +# aws_secret_access_key=aws_secret_access_key, +# aws_role_name=aws_role_name, +# aws_session_name="my-test-session", +# ) - response = completion( - model="gpt-3.5-turbo", - messages=messages, - aws_region_name="us-east-1", - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_role_name=aws_role_name, - aws_session_name="my-test-session", - ) - # Add any assertions here to check the response - print(response) - except RateLimitError: - pass - except Exception as e: - pytest.fail(f"Error occurred: {e}") +# response = completion( +# model="gpt-3.5-turbo", +# messages=messages, +# aws_region_name="us-east-1", +# aws_access_key_id=aws_access_key_id, +# aws_secret_access_key=aws_secret_access_key, +# aws_role_name=aws_role_name, +# aws_session_name="my-test-session", +# ) +# # Add any assertions here to check the response +# print(response) +# except RateLimitError: +# pass +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") -# test_completion_bedrock_claude_sts_client_auth() +# # test_completion_bedrock_claude_sts_client_auth() -def test_provisioned_throughput(): - try: - litellm.set_verbose = True - import botocore, json, io - import botocore.session - from botocore.stub import Stubber +# def test_provisioned_throughput(): +# try: +# litellm.set_verbose = True +# import botocore, json, io +# import botocore.session +# from botocore.stub import Stubber - bedrock_client = botocore.session.get_session().create_client( - "bedrock-runtime", region_name="us-east-1" - ) +# bedrock_client = botocore.session.get_session().create_client( +# "bedrock-runtime", region_name="us-east-1" +# ) - expected_params = { - "accept": "application/json", - "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", ' - '"max_tokens_to_sample": 256}', - "contentType": "application/json", - "modelId": "provisioned-model-arn", - } - response_from_bedrock = { - "body": io.StringIO( - json.dumps( - { - "completion": " Here is a short poem about the sky:", - "stop_reason": "max_tokens", - "stop": None, - } - ) - ), - "contentType": "contentType", - "ResponseMetadata": {"HTTPStatusCode": 200}, - } +# expected_params = { +# "accept": "application/json", +# "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", ' +# '"max_tokens_to_sample": 256}', +# "contentType": "application/json", +# "modelId": "provisioned-model-arn", +# } +# response_from_bedrock = { +# "body": io.StringIO( +# json.dumps( +# { +# "completion": " Here is a short poem about the sky:", +# "stop_reason": "max_tokens", +# "stop": None, +# } +# ) +# ), +# "contentType": "contentType", +# "ResponseMetadata": {"HTTPStatusCode": 200}, +# } - with Stubber(bedrock_client) as stubber: - stubber.add_response( - "invoke_model", - service_response=response_from_bedrock, - expected_params=expected_params, - ) - response = litellm.completion( - model="bedrock/anthropic.claude-instant-v1", - model_id="provisioned-model-arn", - messages=[{"content": "Hello, how are you?", "role": "user"}], - aws_bedrock_client=bedrock_client, - ) - print("response stubbed", response) - except Exception as e: - pytest.fail(f"Error occurred: {e}") +# with Stubber(bedrock_client) as stubber: +# stubber.add_response( +# "invoke_model", +# service_response=response_from_bedrock, +# expected_params=expected_params, +# ) +# response = litellm.completion( +# model="bedrock/anthropic.claude-instant-v1", +# model_id="provisioned-model-arn", +# messages=[{"content": "Hello, how are you?", "role": "user"}], +# aws_bedrock_client=bedrock_client, +# ) +# print("response stubbed", response) +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") -# test_provisioned_throughput() +# # test_provisioned_throughput() diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index de9740ebdf..1764b65c04 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -546,6 +546,7 @@ def test_redis_cache_acompletion_stream(): # test_redis_cache_acompletion_stream() +@pytest.mark.skip(reason="AWS Suspended Account") def test_redis_cache_acompletion_stream_bedrock(): import asyncio diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 71f77c9b49..f502e5f038 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1404,6 +1404,7 @@ def test_customprompt_together_ai(): # test_customprompt_together_ai() +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_sagemaker(): try: litellm.set_verbose = True @@ -1429,6 +1430,7 @@ def test_completion_sagemaker(): # test_completion_sagemaker() +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_sagemaker_stream(): try: litellm.set_verbose = False @@ -1459,6 +1461,7 @@ def test_completion_sagemaker_stream(): pytest.fail(f"Error occurred: {e}") +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_chat_sagemaker(): try: messages = [{"role": "user", "content": "Hey, how's it going?"}] @@ -1483,6 +1486,7 @@ def test_completion_chat_sagemaker(): # test_completion_chat_sagemaker() +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_chat_sagemaker_mistral(): try: messages = [{"role": "user", "content": "Hey, how's it going?"}] @@ -1501,6 +1505,7 @@ def test_completion_chat_sagemaker_mistral(): # test_completion_chat_sagemaker_mistral() +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_titan_null_response(): try: response = completion( @@ -1526,6 +1531,7 @@ def test_completion_bedrock_titan_null_response(): pytest.fail(f"An error occurred - {str(e)}") +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_titan(): try: response = completion( @@ -1568,6 +1574,7 @@ def test_completion_bedrock_claude(): # test_completion_bedrock_claude() +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_cohere(): print("calling bedrock cohere") litellm.set_verbose = True diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index bb460b76bd..b82cfd0a62 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -171,6 +171,7 @@ def test_cost_openai_image_gen(): assert cost == 0.019922944 +@pytest.mark.skip(reason="AWS Suspended Account") def test_cost_bedrock_pricing(): """ - get pricing specific to region for a model diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py index 6e55cc5a1e..683173b21e 100644 --- a/litellm/tests/test_custom_callback_input.py +++ b/litellm/tests/test_custom_callback_input.py @@ -478,6 +478,7 @@ async def test_async_chat_azure_stream(): ## Test Bedrock + sync +@pytest.mark.skip(reason="AWS Suspended Account") def test_chat_bedrock_stream(): try: customHandler = CompletionCustomHandler() @@ -518,6 +519,7 @@ def test_chat_bedrock_stream(): ## Test Bedrock + Async +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_async_chat_bedrock_stream(): try: @@ -561,6 +563,7 @@ async def test_async_chat_bedrock_stream(): ## Test Sagemaker + Async +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_async_chat_sagemaker_stream(): try: @@ -793,6 +796,7 @@ async def test_async_embedding_azure(): ## Test Bedrock + Async +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_async_embedding_bedrock(): try: diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py index a7b0c937f0..fe13076890 100644 --- a/litellm/tests/test_custom_logger.py +++ b/litellm/tests/test_custom_logger.py @@ -388,6 +388,7 @@ async def test_async_custom_handler_embedding_optional_param(): # asyncio.run(test_async_custom_handler_embedding_optional_param()) +@pytest.mark.skip(reason="AWS Account suspended. Pending their approval") @pytest.mark.asyncio async def test_async_custom_handler_embedding_optional_param_bedrock(): """ diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 4637a79e04..b88f6ae7f4 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -256,6 +256,7 @@ async def test_vertexai_aembedding(): pytest.fail(f"Error occurred: {e}") +@pytest.mark.skip(reason="AWS Suspended Account") def test_bedrock_embedding_titan(): try: # this tests if we support str input for bedrock embedding @@ -422,6 +423,7 @@ def test_aembedding_azure(): # test_aembedding_azure() +@pytest.mark.skip(reason="AWS Suspended Account") def test_sagemaker_embeddings(): try: response = litellm.embedding( @@ -438,6 +440,7 @@ def test_sagemaker_embeddings(): pytest.fail(f"Error occurred: {e}") +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_sagemaker_aembeddings(): try: diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 4729cabb23..9c90014c0d 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -42,6 +42,7 @@ exception_models = [ # Test 1: Context Window Errors +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.parametrize("model", exception_models) def test_context_window(model): print("Testing context window error") @@ -120,9 +121,9 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th os.environ["AI21_API_KEY"] = "bad-key" elif "togethercomputer" in model: temporary_key = os.environ["TOGETHERAI_API_KEY"] - os.environ[ - "TOGETHERAI_API_KEY" - ] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a" + os.environ["TOGETHERAI_API_KEY"] = ( + "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a" + ) elif model in litellm.openrouter_models: temporary_key = os.environ["OPENROUTER_API_KEY"] os.environ["OPENROUTER_API_KEY"] = "bad-key" diff --git a/litellm/tests/test_health_check.py b/litellm/tests/test_health_check.py index 21b72d2ac3..f632e76921 100644 --- a/litellm/tests/test_health_check.py +++ b/litellm/tests/test_health_check.py @@ -87,6 +87,7 @@ async def test_azure_img_gen_health_check(): # asyncio.run(test_azure_img_gen_health_check()) +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_sagemaker_embedding_health_check(): response = await litellm.ahealth_check( diff --git a/litellm/tests/test_image_generation.py b/litellm/tests/test_image_generation.py index 59ccaacd8d..0672319a21 100644 --- a/litellm/tests/test_image_generation.py +++ b/litellm/tests/test_image_generation.py @@ -121,6 +121,7 @@ async def test_async_image_generation_azure(): pytest.fail(f"An exception occurred - {str(e)}") +@pytest.mark.skip(reason="AWS Suspended Account") def test_image_generation_bedrock(): try: litellm.set_verbose = True @@ -141,6 +142,7 @@ def test_image_generation_bedrock(): pytest.fail(f"An exception occurred - {str(e)}") +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_aimage_generation_bedrock_with_optional_params(): try: diff --git a/litellm/tests/test_key_generate_dynamodb.py b/litellm/tests/test_key_generate_dynamodb.py index 573bd944db..e28b9f047f 100644 --- a/litellm/tests/test_key_generate_dynamodb.py +++ b/litellm/tests/test_key_generate_dynamodb.py @@ -1,516 +1,517 @@ -# Test the following scenarios: -# 1. Generate a Key, and use it to make a call -# 2. Make a call with invalid key, expect it to fail -# 3. Make a call to a key with invalid model - expect to fail -# 4. Make a call to a key with valid model - expect to pass -# 5. Make a call with key over budget, expect to fail -# 6. Make a streaming chat/completions call with key over budget, expect to fail - - -# function to call to generate key - async def new_user(data: NewUserRequest): -# function to validate a request - async def user_auth(request: Request): - -import sys, os -import traceback -from dotenv import load_dotenv -from fastapi import Request - -load_dotenv() -import os, io - -# this file is to test litellm/proxy - -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system path -import pytest, logging, asyncio -import litellm, asyncio -from litellm.proxy.proxy_server import ( - new_user, - user_api_key_auth, - user_update, - generate_key_fn, -) - -from litellm.proxy._types import NewUserRequest, DynamoDBArgs, GenerateKeyRequest -from litellm.proxy.utils import DBClient, hash_token -from starlette.datastructures import URL - - -request_data = { - "model": "azure-gpt-3.5", - "messages": [ - {"role": "user", "content": "this is my new test. respond in 50 lines"} - ], -} - - -@pytest.fixture -def custom_db_client(): - # Assuming DBClient is a class that needs to be instantiated - db_args = { - "ssl_verify": False, - "billing_mode": "PAY_PER_REQUEST", - "region_name": "us-west-2", - } - custom_db_client = DBClient( - custom_db_type="dynamo_db", - custom_db_args=db_args, - ) - # Reset litellm.proxy.proxy_server.prisma_client to None - litellm.proxy.proxy_server.prisma_client = None - - return custom_db_client - - -def test_generate_and_call_with_valid_key(custom_db_client): - # 1. Generate a Key, and use it to make a call - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - try: - - async def test(): - request = NewUserRequest() - key = await new_user(request) - print(key) - - generated_key = key.key - bearer_token = "Bearer " + generated_key - - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) +# @pytest.mark.skip(reason="AWS Suspended Account") +# # Test the following scenarios: +# # 1. Generate a Key, and use it to make a call +# # 2. Make a call with invalid key, expect it to fail +# # 3. Make a call to a key with invalid model - expect to fail +# # 4. Make a call to a key with valid model - expect to pass +# # 5. Make a call with key over budget, expect to fail +# # 6. Make a streaming chat/completions call with key over budget, expect to fail + + +# # function to call to generate key - async def new_user(data: NewUserRequest): +# # function to validate a request - async def user_auth(request: Request): + +# import sys, os +# import traceback +# from dotenv import load_dotenv +# from fastapi import Request + +# load_dotenv() +# import os, io + +# # this file is to test litellm/proxy + +# sys.path.insert( +# 0, os.path.abspath("../..") +# ) # Adds the parent directory to the system path +# import pytest, logging, asyncio +# import litellm, asyncio +# from litellm.proxy.proxy_server import ( +# new_user, +# user_api_key_auth, +# user_update, +# generate_key_fn, +# ) + +# from litellm.proxy._types import NewUserRequest, DynamoDBArgs, GenerateKeyRequest +# from litellm.proxy.utils import DBClient, hash_token +# from starlette.datastructures import URL + + +# request_data = { +# "model": "azure-gpt-3.5", +# "messages": [ +# {"role": "user", "content": "this is my new test. respond in 50 lines"} +# ], +# } + + +# @pytest.fixture +# def custom_db_client(): +# # Assuming DBClient is a class that needs to be instantiated +# db_args = { +# "ssl_verify": False, +# "billing_mode": "PAY_PER_REQUEST", +# "region_name": "us-west-2", +# } +# custom_db_client = DBClient( +# custom_db_type="dynamo_db", +# custom_db_args=db_args, +# ) +# # Reset litellm.proxy.proxy_server.prisma_client to None +# litellm.proxy.proxy_server.prisma_client = None + +# return custom_db_client + + +# def test_generate_and_call_with_valid_key(custom_db_client): +# # 1. Generate a Key, and use it to make a call +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# try: + +# async def test(): +# request = NewUserRequest() +# key = await new_user(request) +# print(key) + +# generated_key = key.key +# bearer_token = "Bearer " + generated_key + +# request = Request(scope={"type": "http"}) +# request._url = URL(url="/chat/completions") + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) - asyncio.run(test()) - except Exception as e: - pytest.fail(f"An exception occurred - {str(e)}") - - -def test_call_with_invalid_key(custom_db_client): - # 2. Make a call with invalid key, expect it to fail - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - try: - - async def test(): - generated_key = "bad-key" - bearer_token = "Bearer " + generated_key - - request = Request(scope={"type": "http"}, receive=None) - request._url = URL(url="/chat/completions") - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - pytest.fail(f"This should have failed!. IT's an invalid key") +# asyncio.run(test()) +# except Exception as e: +# pytest.fail(f"An exception occurred - {str(e)}") + + +# def test_call_with_invalid_key(custom_db_client): +# # 2. Make a call with invalid key, expect it to fail +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# try: + +# async def test(): +# generated_key = "bad-key" +# bearer_token = "Bearer " + generated_key + +# request = Request(scope={"type": "http"}, receive=None) +# request._url = URL(url="/chat/completions") + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# pytest.fail(f"This should have failed!. IT's an invalid key") - asyncio.run(test()) - except Exception as e: - print("Got Exception", e) - print(e.message) - assert "Authentication Error" in e.message - pass - - -def test_call_with_invalid_model(custom_db_client): - # 3. Make a call to a key with an invalid model - expect to fail - from litellm._logging import verbose_proxy_logger - import logging - - verbose_proxy_logger.setLevel(logging.DEBUG) - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - try: - - async def test(): - request = NewUserRequest(models=["mistral"]) - key = await new_user(request) - print(key) - - generated_key = key.key - bearer_token = "Bearer " + generated_key - - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") - - async def return_body(): - return b'{"model": "gemini-pro-vision"}' - - request.body = return_body - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - pytest.fail(f"This should have failed!. IT's an invalid model") - - asyncio.run(test()) - except Exception as e: - assert ( - e.message - == "Authentication Error, API Key not allowed to access model. This token can only access models=['mistral']. Tried to access gemini-pro-vision" - ) - pass - - -def test_call_with_valid_model(custom_db_client): - # 4. Make a call to a key with a valid model - expect to pass - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - try: - - async def test(): - request = NewUserRequest(models=["mistral"]) - key = await new_user(request) - print(key) - - generated_key = key.key - bearer_token = "Bearer " + generated_key - - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") - - async def return_body(): - return b'{"model": "mistral"}' - - request.body = return_body - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - - asyncio.run(test()) - except Exception as e: - pytest.fail(f"An exception occurred - {str(e)}") - - -def test_call_with_user_over_budget(custom_db_client): - # 5. Make a call with a key over budget, expect to fail - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - from litellm._logging import verbose_proxy_logger, verbose_logger - import logging - - litellm.set_verbose = True - verbose_logger.setLevel(logging.DEBUG) - verbose_proxy_logger.setLevel(logging.DEBUG) - try: - - async def test(): - request = NewUserRequest(max_budget=0.00001) - key = await new_user(request) - print(key) - - generated_key = key.key - user_id = key.user_id - bearer_token = "Bearer " + generated_key - - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - - # update spend using track_cost callback, make 2nd request, it should fail - from litellm.proxy.proxy_server import ( - _PROXY_track_cost_callback as track_cost_callback, - ) - from litellm import ModelResponse, Choices, Message, Usage - - resp = ModelResponse( - id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", - choices=[ - Choices( - finish_reason=None, - index=0, - message=Message( - content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", - role="assistant", - ), - ) - ], - model="gpt-35-turbo", # azure always has model written like this - usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), - ) - await track_cost_callback( - kwargs={ - "stream": False, - "litellm_params": { - "metadata": { - "user_api_key": hash_token(generated_key), - "user_api_key_user_id": user_id, - } - }, - "response_cost": 0.00002, - }, - completion_response=resp, - ) - await asyncio.sleep(5) - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - pytest.fail(f"This should have failed!. They key crossed it's budget") - - asyncio.run(test()) - except Exception as e: - error_detail = e.message - assert "Authentication Error, ExceededBudget:" in error_detail - print(vars(e)) - - -def test_call_with_user_over_budget_stream(custom_db_client): - # 6. Make a call with a key over budget, expect to fail - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - from litellm._logging import verbose_proxy_logger - import logging - - litellm.set_verbose = True - verbose_proxy_logger.setLevel(logging.DEBUG) - try: - - async def test(): - request = NewUserRequest(max_budget=0.00001) - key = await new_user(request) - print(key) - - generated_key = key.key - user_id = key.user_id - bearer_token = "Bearer " + generated_key - - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - - # update spend using track_cost callback, make 2nd request, it should fail - from litellm.proxy.proxy_server import ( - _PROXY_track_cost_callback as track_cost_callback, - ) - from litellm import ModelResponse, Choices, Message, Usage - - resp = ModelResponse( - id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", - choices=[ - Choices( - finish_reason=None, - index=0, - message=Message( - content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", - role="assistant", - ), - ) - ], - model="gpt-35-turbo", # azure always has model written like this - usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), - ) - await track_cost_callback( - kwargs={ - "stream": True, - "complete_streaming_response": resp, - "litellm_params": { - "metadata": { - "user_api_key": hash_token(generated_key), - "user_api_key_user_id": user_id, - } - }, - "response_cost": 0.00002, - }, - completion_response=ModelResponse(), - ) - await asyncio.sleep(5) - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - pytest.fail(f"This should have failed!. They key crossed it's budget") - - asyncio.run(test()) - except Exception as e: - error_detail = e.message - assert "Authentication Error, ExceededBudget:" in error_detail - print(vars(e)) - - -def test_call_with_user_key_budget(custom_db_client): - # 7. Make a call with a key over budget, expect to fail - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - from litellm._logging import verbose_proxy_logger - import logging - - verbose_proxy_logger.setLevel(logging.DEBUG) - try: - - async def test(): - request = GenerateKeyRequest(max_budget=0.00001) - key = await generate_key_fn(request) - print(key) - - generated_key = key.key - user_id = key.user_id - bearer_token = "Bearer " + generated_key - - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - - # update spend using track_cost callback, make 2nd request, it should fail - from litellm.proxy.proxy_server import ( - _PROXY_track_cost_callback as track_cost_callback, - ) - from litellm import ModelResponse, Choices, Message, Usage - - resp = ModelResponse( - id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", - choices=[ - Choices( - finish_reason=None, - index=0, - message=Message( - content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", - role="assistant", - ), - ) - ], - model="gpt-35-turbo", # azure always has model written like this - usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), - ) - await track_cost_callback( - kwargs={ - "stream": False, - "litellm_params": { - "metadata": { - "user_api_key": hash_token(generated_key), - "user_api_key_user_id": user_id, - } - }, - "response_cost": 0.00002, - }, - completion_response=resp, - ) - await asyncio.sleep(5) - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - pytest.fail(f"This should have failed!. They key crossed it's budget") - - asyncio.run(test()) - except Exception as e: - error_detail = e.message - assert "Authentication Error, ExceededTokenBudget:" in error_detail - print(vars(e)) - - -def test_call_with_key_over_budget_stream(custom_db_client): - # 8. Make a call with a key over budget, expect to fail - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - from litellm._logging import verbose_proxy_logger - import logging - - litellm.set_verbose = True - verbose_proxy_logger.setLevel(logging.DEBUG) - try: - - async def test(): - request = GenerateKeyRequest(max_budget=0.00001) - key = await generate_key_fn(request) - print(key) - - generated_key = key.key - user_id = key.user_id - bearer_token = "Bearer " + generated_key - - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - - # update spend using track_cost callback, make 2nd request, it should fail - from litellm.proxy.proxy_server import ( - _PROXY_track_cost_callback as track_cost_callback, - ) - from litellm import ModelResponse, Choices, Message, Usage - - resp = ModelResponse( - id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", - choices=[ - Choices( - finish_reason=None, - index=0, - message=Message( - content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", - role="assistant", - ), - ) - ], - model="gpt-35-turbo", # azure always has model written like this - usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), - ) - await track_cost_callback( - kwargs={ - "stream": True, - "complete_streaming_response": resp, - "litellm_params": { - "metadata": { - "user_api_key": hash_token(generated_key), - "user_api_key_user_id": user_id, - } - }, - "response_cost": 0.00002, - }, - completion_response=ModelResponse(), - ) - await asyncio.sleep(5) - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - pytest.fail(f"This should have failed!. They key crossed it's budget") - - asyncio.run(test()) - except Exception as e: - error_detail = e.message - assert "Authentication Error, ExceededTokenBudget:" in error_detail - print(vars(e)) - - -def test_dynamo_db_migration(custom_db_client): - # Tests the temporary patch we have in place - setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) - setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - setattr(litellm.proxy.proxy_server, "user_custom_auth", None) - try: - - async def test(): - request = GenerateKeyRequest(max_budget=1) - key = await generate_key_fn(request) - print(key) - - generated_key = key.key - bearer_token = ( - "Bearer " + generated_key - ) # this works with ishaan's db, it's a never expiring key - - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") - - async def return_body(): - return b'{"model": "azure-models"}' - - request.body = return_body - - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - - asyncio.run(test()) - except Exception as e: - pytest.fail(f"An exception occurred - {traceback.format_exc()}") +# asyncio.run(test()) +# except Exception as e: +# print("Got Exception", e) +# print(e.message) +# assert "Authentication Error" in e.message +# pass + + +# def test_call_with_invalid_model(custom_db_client): +# # 3. Make a call to a key with an invalid model - expect to fail +# from litellm._logging import verbose_proxy_logger +# import logging + +# verbose_proxy_logger.setLevel(logging.DEBUG) +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# try: + +# async def test(): +# request = NewUserRequest(models=["mistral"]) +# key = await new_user(request) +# print(key) + +# generated_key = key.key +# bearer_token = "Bearer " + generated_key + +# request = Request(scope={"type": "http"}) +# request._url = URL(url="/chat/completions") + +# async def return_body(): +# return b'{"model": "gemini-pro-vision"}' + +# request.body = return_body + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# pytest.fail(f"This should have failed!. IT's an invalid model") + +# asyncio.run(test()) +# except Exception as e: +# assert ( +# e.message +# == "Authentication Error, API Key not allowed to access model. This token can only access models=['mistral']. Tried to access gemini-pro-vision" +# ) +# pass + + +# def test_call_with_valid_model(custom_db_client): +# # 4. Make a call to a key with a valid model - expect to pass +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# try: + +# async def test(): +# request = NewUserRequest(models=["mistral"]) +# key = await new_user(request) +# print(key) + +# generated_key = key.key +# bearer_token = "Bearer " + generated_key + +# request = Request(scope={"type": "http"}) +# request._url = URL(url="/chat/completions") + +# async def return_body(): +# return b'{"model": "mistral"}' + +# request.body = return_body + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) + +# asyncio.run(test()) +# except Exception as e: +# pytest.fail(f"An exception occurred - {str(e)}") + + +# def test_call_with_user_over_budget(custom_db_client): +# # 5. Make a call with a key over budget, expect to fail +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# from litellm._logging import verbose_proxy_logger, verbose_logger +# import logging + +# litellm.set_verbose = True +# verbose_logger.setLevel(logging.DEBUG) +# verbose_proxy_logger.setLevel(logging.DEBUG) +# try: + +# async def test(): +# request = NewUserRequest(max_budget=0.00001) +# key = await new_user(request) +# print(key) + +# generated_key = key.key +# user_id = key.user_id +# bearer_token = "Bearer " + generated_key + +# request = Request(scope={"type": "http"}) +# request._url = URL(url="/chat/completions") + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) + +# # update spend using track_cost callback, make 2nd request, it should fail +# from litellm.proxy.proxy_server import ( +# _PROXY_track_cost_callback as track_cost_callback, +# ) +# from litellm import ModelResponse, Choices, Message, Usage + +# resp = ModelResponse( +# id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", +# choices=[ +# Choices( +# finish_reason=None, +# index=0, +# message=Message( +# content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", +# role="assistant", +# ), +# ) +# ], +# model="gpt-35-turbo", # azure always has model written like this +# usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), +# ) +# await track_cost_callback( +# kwargs={ +# "stream": False, +# "litellm_params": { +# "metadata": { +# "user_api_key": hash_token(generated_key), +# "user_api_key_user_id": user_id, +# } +# }, +# "response_cost": 0.00002, +# }, +# completion_response=resp, +# ) +# await asyncio.sleep(5) +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) +# pytest.fail(f"This should have failed!. They key crossed it's budget") + +# asyncio.run(test()) +# except Exception as e: +# error_detail = e.message +# assert "Authentication Error, ExceededBudget:" in error_detail +# print(vars(e)) + + +# def test_call_with_user_over_budget_stream(custom_db_client): +# # 6. Make a call with a key over budget, expect to fail +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# from litellm._logging import verbose_proxy_logger +# import logging + +# litellm.set_verbose = True +# verbose_proxy_logger.setLevel(logging.DEBUG) +# try: + +# async def test(): +# request = NewUserRequest(max_budget=0.00001) +# key = await new_user(request) +# print(key) + +# generated_key = key.key +# user_id = key.user_id +# bearer_token = "Bearer " + generated_key + +# request = Request(scope={"type": "http"}) +# request._url = URL(url="/chat/completions") + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) + +# # update spend using track_cost callback, make 2nd request, it should fail +# from litellm.proxy.proxy_server import ( +# _PROXY_track_cost_callback as track_cost_callback, +# ) +# from litellm import ModelResponse, Choices, Message, Usage + +# resp = ModelResponse( +# id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", +# choices=[ +# Choices( +# finish_reason=None, +# index=0, +# message=Message( +# content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", +# role="assistant", +# ), +# ) +# ], +# model="gpt-35-turbo", # azure always has model written like this +# usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), +# ) +# await track_cost_callback( +# kwargs={ +# "stream": True, +# "complete_streaming_response": resp, +# "litellm_params": { +# "metadata": { +# "user_api_key": hash_token(generated_key), +# "user_api_key_user_id": user_id, +# } +# }, +# "response_cost": 0.00002, +# }, +# completion_response=ModelResponse(), +# ) +# await asyncio.sleep(5) +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) +# pytest.fail(f"This should have failed!. They key crossed it's budget") + +# asyncio.run(test()) +# except Exception as e: +# error_detail = e.message +# assert "Authentication Error, ExceededBudget:" in error_detail +# print(vars(e)) + + +# def test_call_with_user_key_budget(custom_db_client): +# # 7. Make a call with a key over budget, expect to fail +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# from litellm._logging import verbose_proxy_logger +# import logging + +# verbose_proxy_logger.setLevel(logging.DEBUG) +# try: + +# async def test(): +# request = GenerateKeyRequest(max_budget=0.00001) +# key = await generate_key_fn(request) +# print(key) + +# generated_key = key.key +# user_id = key.user_id +# bearer_token = "Bearer " + generated_key + +# request = Request(scope={"type": "http"}) +# request._url = URL(url="/chat/completions") + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) + +# # update spend using track_cost callback, make 2nd request, it should fail +# from litellm.proxy.proxy_server import ( +# _PROXY_track_cost_callback as track_cost_callback, +# ) +# from litellm import ModelResponse, Choices, Message, Usage + +# resp = ModelResponse( +# id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", +# choices=[ +# Choices( +# finish_reason=None, +# index=0, +# message=Message( +# content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", +# role="assistant", +# ), +# ) +# ], +# model="gpt-35-turbo", # azure always has model written like this +# usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), +# ) +# await track_cost_callback( +# kwargs={ +# "stream": False, +# "litellm_params": { +# "metadata": { +# "user_api_key": hash_token(generated_key), +# "user_api_key_user_id": user_id, +# } +# }, +# "response_cost": 0.00002, +# }, +# completion_response=resp, +# ) +# await asyncio.sleep(5) +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) +# pytest.fail(f"This should have failed!. They key crossed it's budget") + +# asyncio.run(test()) +# except Exception as e: +# error_detail = e.message +# assert "Authentication Error, ExceededTokenBudget:" in error_detail +# print(vars(e)) + + +# def test_call_with_key_over_budget_stream(custom_db_client): +# # 8. Make a call with a key over budget, expect to fail +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# from litellm._logging import verbose_proxy_logger +# import logging + +# litellm.set_verbose = True +# verbose_proxy_logger.setLevel(logging.DEBUG) +# try: + +# async def test(): +# request = GenerateKeyRequest(max_budget=0.00001) +# key = await generate_key_fn(request) +# print(key) + +# generated_key = key.key +# user_id = key.user_id +# bearer_token = "Bearer " + generated_key + +# request = Request(scope={"type": "http"}) +# request._url = URL(url="/chat/completions") + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) + +# # update spend using track_cost callback, make 2nd request, it should fail +# from litellm.proxy.proxy_server import ( +# _PROXY_track_cost_callback as track_cost_callback, +# ) +# from litellm import ModelResponse, Choices, Message, Usage + +# resp = ModelResponse( +# id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", +# choices=[ +# Choices( +# finish_reason=None, +# index=0, +# message=Message( +# content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", +# role="assistant", +# ), +# ) +# ], +# model="gpt-35-turbo", # azure always has model written like this +# usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), +# ) +# await track_cost_callback( +# kwargs={ +# "stream": True, +# "complete_streaming_response": resp, +# "litellm_params": { +# "metadata": { +# "user_api_key": hash_token(generated_key), +# "user_api_key_user_id": user_id, +# } +# }, +# "response_cost": 0.00002, +# }, +# completion_response=ModelResponse(), +# ) +# await asyncio.sleep(5) +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) +# pytest.fail(f"This should have failed!. They key crossed it's budget") + +# asyncio.run(test()) +# except Exception as e: +# error_detail = e.message +# assert "Authentication Error, ExceededTokenBudget:" in error_detail +# print(vars(e)) + + +# def test_dynamo_db_migration(custom_db_client): +# # Tests the temporary patch we have in place +# setattr(litellm.proxy.proxy_server, "custom_db_client", custom_db_client) +# setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") +# setattr(litellm.proxy.proxy_server, "user_custom_auth", None) +# try: + +# async def test(): +# request = GenerateKeyRequest(max_budget=1) +# key = await generate_key_fn(request) +# print(key) + +# generated_key = key.key +# bearer_token = ( +# "Bearer " + generated_key +# ) # this works with ishaan's db, it's a never expiring key + +# request = Request(scope={"type": "http"}) +# request._url = URL(url="/chat/completions") + +# async def return_body(): +# return b'{"model": "azure-models"}' + +# request.body = return_body + +# # use generated key to auth in +# result = await user_api_key_auth(request=request, api_key=bearer_token) +# print("result from user auth with new key", result) + +# asyncio.run(test()) +# except Exception as e: +# pytest.fail(f"An exception occurred - {traceback.format_exc()}") diff --git a/litellm/tests/test_model_max_token_adjust.py b/litellm/tests/test_model_max_token_adjust.py index b4d48b5e28..e6b31245f0 100644 --- a/litellm/tests/test_model_max_token_adjust.py +++ b/litellm/tests/test_model_max_token_adjust.py @@ -12,6 +12,7 @@ import litellm from litellm import completion +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_sagemaker(): litellm.set_verbose = True litellm.drop_params = True diff --git a/litellm/tests/test_provider_specific_config.py b/litellm/tests/test_provider_specific_config.py index 6c0edf02bd..5e5d19c786 100644 --- a/litellm/tests/test_provider_specific_config.py +++ b/litellm/tests/test_provider_specific_config.py @@ -473,6 +473,7 @@ def aleph_alpha_test_completion(): # Sagemaker +@pytest.mark.skip(reason="AWS Suspended Account") def sagemaker_test_completion(): litellm.SagemakerConfig(max_new_tokens=10) # litellm.set_verbose=True @@ -514,6 +515,7 @@ def sagemaker_test_completion(): # Bedrock +@pytest.mark.skip(reason="AWS Suspended Account") def bedrock_test_completion(): litellm.AmazonCohereConfig(max_tokens=10) # litellm.set_verbose=True diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index ab329e14ae..127caf223b 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -166,14 +166,6 @@ def test_call_one_endpoint(): "tpm": 240000, "rpm": 1800, }, - { - "model_name": "claude-v1", - "litellm_params": { - "model": "bedrock/anthropic.claude-instant-v1", - }, - "tpm": 100000, - "rpm": 10000, - }, { "model_name": "text-embedding-ada-002", "litellm_params": { @@ -202,15 +194,6 @@ def test_call_one_endpoint(): ) print("\n response", response) - async def call_bedrock_claude(): - response = await router.acompletion( - model="bedrock/anthropic.claude-instant-v1", - messages=[{"role": "user", "content": "hello this request will pass"}], - specific_deployment=True, - ) - - print("\n response", response) - async def call_azure_embedding(): response = await router.aembedding( model="azure/azure-embedding-model", @@ -221,7 +204,6 @@ def test_call_one_endpoint(): print("\n response", response) asyncio.run(call_azure_completion()) - asyncio.run(call_bedrock_claude()) asyncio.run(call_azure_embedding()) os.environ["AZURE_API_BASE"] = old_api_base @@ -593,6 +575,7 @@ def test_azure_embedding_on_router(): # test_azure_embedding_on_router() +@pytest.mark.skip(reason="AWS Suspended Account") def test_bedrock_on_router(): litellm.set_verbose = True print("\n Testing bedrock on router\n") diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py index b22683c41e..139914f6df 100644 --- a/litellm/tests/test_router_timeout.py +++ b/litellm/tests/test_router_timeout.py @@ -87,6 +87,7 @@ def test_router_timeouts(): print("********** TOKENS USED SO FAR = ", total_tokens_used) +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_router_timeouts_bedrock(): import openai diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 7fdd502048..86f5bcf35d 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -764,6 +764,7 @@ def test_completion_replicate_stream_bad_key(): # test_completion_replicate_stream_bad_key() +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_claude_stream(): try: litellm.set_verbose = False @@ -810,6 +811,7 @@ def test_completion_bedrock_claude_stream(): # test_completion_bedrock_claude_stream() +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_bedrock_ai21_stream(): try: litellm.set_verbose = False @@ -911,6 +913,7 @@ def test_sagemaker_weird_response(): # test_sagemaker_weird_response() +@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.asyncio async def test_sagemaker_streaming_async(): try: @@ -949,6 +952,7 @@ async def test_sagemaker_streaming_async(): # asyncio.run(test_sagemaker_streaming_async()) +@pytest.mark.skip(reason="AWS Suspended Account") def test_completion_sagemaker_stream(): try: response = completion(