forked from phoenix/litellm-mirror
* fix azure exceptions * test_bad_request_error_contains_httpx_response * test_bad_request_error_contains_httpx_response * use safe access to get exception response * fix get attr
1176 lines
40 KiB
Python
1176 lines
40 KiB
Python
import asyncio
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import traceback
|
|
from typing import Any
|
|
|
|
from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
|
|
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
import litellm
|
|
from litellm import ( # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError,
|
|
ContextWindowExceededError,
|
|
completion,
|
|
embedding,
|
|
)
|
|
|
|
litellm.vertex_project = "pathrise-convert-1606954137718"
|
|
litellm.vertex_location = "us-central1"
|
|
litellm.num_retries = 0
|
|
|
|
# litellm.failure_callback = ["sentry"]
|
|
#### What this tests ####
|
|
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
|
|
|
|
|
|
# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate
|
|
|
|
# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.)
|
|
|
|
# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
|
|
|
|
exception_models = [
|
|
"sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
|
|
"bedrock/anthropic.claude-instant-v1",
|
|
]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_content_policy_exception_azure():
|
|
try:
|
|
# this is ony a test - we needed some way to invoke the exception :(
|
|
litellm.set_verbose = True
|
|
response = await litellm.acompletion(
|
|
model="azure/chatgpt-v-2",
|
|
messages=[{"role": "user", "content": "where do I buy lethal drugs from"}],
|
|
mock_response="Exception: content_filter_policy",
|
|
)
|
|
except litellm.ContentPolicyViolationError as e:
|
|
print("caught a content policy violation error! Passed")
|
|
print("exception", e)
|
|
assert e.response is not None
|
|
assert e.litellm_debug_info is not None
|
|
assert isinstance(e.litellm_debug_info, str)
|
|
assert len(e.litellm_debug_info) > 0
|
|
pass
|
|
except Exception as e:
|
|
print()
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_content_policy_exception_openai():
|
|
try:
|
|
# this is ony a test - we needed some way to invoke the exception :(
|
|
litellm.set_verbose = True
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
stream=True,
|
|
messages=[
|
|
{"role": "user", "content": "Gimme the lyrics to Don't Stop Me Now"}
|
|
],
|
|
)
|
|
async for chunk in response:
|
|
print(chunk)
|
|
except litellm.ContentPolicyViolationError as e:
|
|
print("caught a content policy violation error! Passed")
|
|
print("exception", e)
|
|
assert e.llm_provider == "openai"
|
|
pass
|
|
except Exception as e:
|
|
print()
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
# Test 1: Context Window Errors
|
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
|
@pytest.mark.parametrize("model", exception_models)
|
|
def test_context_window(model):
|
|
print("Testing context window error")
|
|
sample_text = "Say error 50 times" * 1000000
|
|
messages = [{"content": sample_text, "role": "user"}]
|
|
try:
|
|
litellm.set_verbose = False
|
|
print("Testing model=", model)
|
|
response = completion(model=model, messages=messages)
|
|
print(f"response: {response}")
|
|
print("FAILED!")
|
|
pytest.fail(f"An exception occurred")
|
|
except ContextWindowExceededError as e:
|
|
print(f"Worked!")
|
|
except RateLimitError:
|
|
print("RateLimited!")
|
|
except Exception as e:
|
|
print(f"{e}")
|
|
pytest.fail(f"An error occcurred - {e}")
|
|
|
|
|
|
models = ["command-nightly"]
|
|
|
|
|
|
@pytest.mark.skip(reason="duplicate test.")
|
|
@pytest.mark.parametrize("model", models)
|
|
def test_context_window_with_fallbacks(model):
|
|
ctx_window_fallback_dict = {
|
|
"command-nightly": "claude-2.1",
|
|
"gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
|
|
"azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
|
|
}
|
|
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
|
messages = [{"content": sample_text, "role": "user"}]
|
|
|
|
try:
|
|
completion(
|
|
model=model,
|
|
messages=messages,
|
|
context_window_fallback_dict=ctx_window_fallback_dict,
|
|
)
|
|
except litellm.ServiceUnavailableError as e:
|
|
pass
|
|
except litellm.APIConnectionError as e:
|
|
pass
|
|
|
|
|
|
# for model in litellm.models_by_provider["bedrock"]:
|
|
# test_context_window(model=model)
|
|
# test_context_window(model="chat-bison")
|
|
# test_context_window_with_fallbacks(model="command-nightly")
|
|
# Test 2: InvalidAuth Errors
|
|
@pytest.mark.parametrize("model", models)
|
|
def invalid_auth(model): # set the model key to an invalid key, depending on the model
|
|
messages = [{"content": "Hello, how are you?", "role": "user"}]
|
|
temporary_key = None
|
|
try:
|
|
if model == "gpt-3.5-turbo" or model == "gpt-3.5-turbo-instruct":
|
|
temporary_key = os.environ["OPENAI_API_KEY"]
|
|
os.environ["OPENAI_API_KEY"] = "bad-key"
|
|
elif "bedrock" in model:
|
|
temporary_aws_access_key = os.environ["AWS_ACCESS_KEY_ID"]
|
|
os.environ["AWS_ACCESS_KEY_ID"] = "bad-key"
|
|
temporary_aws_region_name = os.environ["AWS_REGION_NAME"]
|
|
os.environ["AWS_REGION_NAME"] = "bad-key"
|
|
temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
|
os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key"
|
|
elif model == "azure/chatgpt-v-2":
|
|
temporary_key = os.environ["AZURE_API_KEY"]
|
|
os.environ["AZURE_API_KEY"] = "bad-key"
|
|
elif model == "claude-3-5-haiku-20241022":
|
|
temporary_key = os.environ["ANTHROPIC_API_KEY"]
|
|
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
|
|
elif model == "command-nightly":
|
|
temporary_key = os.environ["COHERE_API_KEY"]
|
|
os.environ["COHERE_API_KEY"] = "bad-key"
|
|
elif "j2" in model:
|
|
temporary_key = os.environ["AI21_API_KEY"]
|
|
os.environ["AI21_API_KEY"] = "bad-key"
|
|
elif "togethercomputer" in model:
|
|
temporary_key = os.environ["TOGETHERAI_API_KEY"]
|
|
os.environ["TOGETHERAI_API_KEY"] = (
|
|
"84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
|
|
)
|
|
elif model in litellm.openrouter_models:
|
|
temporary_key = os.environ["OPENROUTER_API_KEY"]
|
|
os.environ["OPENROUTER_API_KEY"] = "bad-key"
|
|
elif model in litellm.aleph_alpha_models:
|
|
temporary_key = os.environ["ALEPH_ALPHA_API_KEY"]
|
|
os.environ["ALEPH_ALPHA_API_KEY"] = "bad-key"
|
|
elif model in litellm.nlp_cloud_models:
|
|
temporary_key = os.environ["NLP_CLOUD_API_KEY"]
|
|
os.environ["NLP_CLOUD_API_KEY"] = "bad-key"
|
|
elif (
|
|
model
|
|
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
|
):
|
|
temporary_key = os.environ["REPLICATE_API_KEY"]
|
|
os.environ["REPLICATE_API_KEY"] = "bad-key"
|
|
print(f"model: {model}")
|
|
response = completion(model=model, messages=messages)
|
|
print(f"response: {response}")
|
|
except AuthenticationError as e:
|
|
print(f"AuthenticationError Caught Exception - {str(e)}")
|
|
except (
|
|
OpenAIError
|
|
) as e: # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
|
print(f"OpenAIError Caught Exception - {e}")
|
|
except Exception as e:
|
|
print(type(e))
|
|
print(type(AuthenticationError))
|
|
print(e.__class__.__name__)
|
|
print(f"Uncaught Exception - {e}")
|
|
pytest.fail(f"Error occurred: {e}")
|
|
if temporary_key != None: # reset the key
|
|
if model == "gpt-3.5-turbo":
|
|
os.environ["OPENAI_API_KEY"] = temporary_key
|
|
elif model == "chatgpt-test":
|
|
os.environ["AZURE_API_KEY"] = temporary_key
|
|
azure = True
|
|
elif model == "claude-3-5-haiku-20241022":
|
|
os.environ["ANTHROPIC_API_KEY"] = temporary_key
|
|
elif model == "command-nightly":
|
|
os.environ["COHERE_API_KEY"] = temporary_key
|
|
elif (
|
|
model
|
|
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
|
):
|
|
os.environ["REPLICATE_API_KEY"] = temporary_key
|
|
elif "j2" in model:
|
|
os.environ["AI21_API_KEY"] = temporary_key
|
|
elif "togethercomputer" in model:
|
|
os.environ["TOGETHERAI_API_KEY"] = temporary_key
|
|
elif model in litellm.aleph_alpha_models:
|
|
os.environ["ALEPH_ALPHA_API_KEY"] = temporary_key
|
|
elif model in litellm.nlp_cloud_models:
|
|
os.environ["NLP_CLOUD_API_KEY"] = temporary_key
|
|
elif "bedrock" in model:
|
|
os.environ["AWS_ACCESS_KEY_ID"] = temporary_aws_access_key
|
|
os.environ["AWS_REGION_NAME"] = temporary_aws_region_name
|
|
os.environ["AWS_SECRET_ACCESS_KEY"] = temporary_secret_key
|
|
return
|
|
|
|
|
|
# for model in litellm.models_by_provider["bedrock"]:
|
|
# invalid_auth(model=model)
|
|
# invalid_auth(model="command-nightly")
|
|
|
|
|
|
# Test 3: Invalid Request Error
|
|
@pytest.mark.parametrize("model", models)
|
|
def test_invalid_request_error(model):
|
|
messages = [{"content": "hey, how's it going?", "role": "user"}]
|
|
|
|
with pytest.raises(BadRequestError):
|
|
completion(model=model, messages=messages, max_tokens="hello world")
|
|
|
|
|
|
def test_completion_azure_exception():
|
|
try:
|
|
import openai
|
|
|
|
print("azure gpt-3.5 test\n\n")
|
|
litellm.set_verbose = True
|
|
## Test azure call
|
|
old_azure_key = os.environ["AZURE_API_KEY"]
|
|
os.environ["AZURE_API_KEY"] = "good morning"
|
|
response = completion(
|
|
model="azure/chatgpt-v-2",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
os.environ["AZURE_API_KEY"] = old_azure_key
|
|
print(f"response: {response}")
|
|
print(response)
|
|
except openai.AuthenticationError as e:
|
|
os.environ["AZURE_API_KEY"] = old_azure_key
|
|
print("good job got the correct error for azure when key not set")
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_completion_azure_exception()
|
|
|
|
|
|
def test_azure_embedding_exceptions():
|
|
try:
|
|
|
|
response = litellm.embedding(
|
|
model="azure/azure-embedding-model",
|
|
input="hello",
|
|
messages="hello",
|
|
)
|
|
pytest.fail(f"Bad request this should have failed but got {response}")
|
|
|
|
except Exception as e:
|
|
print(vars(e))
|
|
# CRUCIAL Test - Ensures our exceptions are readable and not overly complicated. some users have complained exceptions will randomly have another exception raised in our exception mapping
|
|
assert (
|
|
e.message
|
|
== "litellm.APIError: AzureException APIError - Embeddings.create() got an unexpected keyword argument 'messages'"
|
|
)
|
|
|
|
|
|
async def asynctest_completion_azure_exception():
|
|
try:
|
|
import openai
|
|
|
|
import litellm
|
|
|
|
print("azure gpt-3.5 test\n\n")
|
|
litellm.set_verbose = True
|
|
## Test azure call
|
|
old_azure_key = os.environ["AZURE_API_KEY"]
|
|
os.environ["AZURE_API_KEY"] = "good morning"
|
|
response = await litellm.acompletion(
|
|
model="azure/chatgpt-v-2",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
print(f"response: {response}")
|
|
print(response)
|
|
except openai.AuthenticationError as e:
|
|
os.environ["AZURE_API_KEY"] = old_azure_key
|
|
print("good job got the correct error for azure when key not set")
|
|
print(e)
|
|
except Exception as e:
|
|
print("Got wrong exception")
|
|
print("exception", e)
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# import asyncio
|
|
# asyncio.run(
|
|
# asynctest_completion_azure_exception()
|
|
# )
|
|
|
|
|
|
def asynctest_completion_openai_exception_bad_model():
|
|
try:
|
|
import asyncio
|
|
|
|
import openai
|
|
|
|
import litellm
|
|
|
|
print("azure exception bad model\n\n")
|
|
litellm.set_verbose = True
|
|
|
|
## Test azure call
|
|
async def test():
|
|
response = await litellm.acompletion(
|
|
model="openai/gpt-6",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
|
|
asyncio.run(test())
|
|
except openai.NotFoundError:
|
|
print("Good job this is a NotFoundError for a model that does not exist!")
|
|
print("Passed")
|
|
except Exception as e:
|
|
print("Raised wrong type of exception", type(e))
|
|
assert isinstance(e, openai.BadRequestError)
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# asynctest_completion_openai_exception_bad_model()
|
|
|
|
|
|
def asynctest_completion_azure_exception_bad_model():
|
|
try:
|
|
import asyncio
|
|
|
|
import openai
|
|
|
|
import litellm
|
|
|
|
print("azure exception bad model\n\n")
|
|
litellm.set_verbose = True
|
|
|
|
## Test azure call
|
|
async def test():
|
|
response = await litellm.acompletion(
|
|
model="azure/gpt-12",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
|
|
asyncio.run(test())
|
|
except openai.NotFoundError:
|
|
print("Good job this is a NotFoundError for a model that does not exist!")
|
|
print("Passed")
|
|
except Exception as e:
|
|
print("Raised wrong type of exception", type(e))
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# asynctest_completion_azure_exception_bad_model()
|
|
|
|
|
|
def test_completion_openai_exception():
|
|
# test if openai:gpt raises openai.AuthenticationError
|
|
try:
|
|
import openai
|
|
|
|
print("openai gpt-3.5 test\n\n")
|
|
litellm.set_verbose = True
|
|
## Test azure call
|
|
old_azure_key = os.environ["OPENAI_API_KEY"]
|
|
os.environ["OPENAI_API_KEY"] = "good morning"
|
|
response = completion(
|
|
model="gpt-4",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
print(f"response: {response}")
|
|
print(response)
|
|
except openai.AuthenticationError as e:
|
|
os.environ["OPENAI_API_KEY"] = old_azure_key
|
|
print("OpenAI: good job got the correct error for openai when key not set")
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_completion_openai_exception()
|
|
|
|
|
|
def test_anthropic_openai_exception():
|
|
# test if anthropic raises litellm.AuthenticationError
|
|
try:
|
|
litellm.set_verbose = True
|
|
## Test azure call
|
|
old_azure_key = os.environ["ANTHROPIC_API_KEY"]
|
|
os.environ.pop("ANTHROPIC_API_KEY")
|
|
response = completion(
|
|
model="anthropic/claude-3-sonnet-20240229",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
print(f"response: {response}")
|
|
print(response)
|
|
except litellm.AuthenticationError as e:
|
|
os.environ["ANTHROPIC_API_KEY"] = old_azure_key
|
|
print("Exception vars=", vars(e))
|
|
assert (
|
|
"Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
|
|
in e.message
|
|
)
|
|
print(
|
|
"ANTHROPIC_API_KEY: good job got the correct error for ANTHROPIC_API_KEY when key not set"
|
|
)
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
def test_completion_mistral_exception():
|
|
# test if mistral/mistral-tiny raises openai.AuthenticationError
|
|
try:
|
|
import openai
|
|
|
|
print("Testing mistral ai exception mapping")
|
|
litellm.set_verbose = True
|
|
## Test azure call
|
|
old_azure_key = os.environ["MISTRAL_API_KEY"]
|
|
os.environ["MISTRAL_API_KEY"] = "good morning"
|
|
response = completion(
|
|
model="mistral/mistral-tiny",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
print(f"response: {response}")
|
|
print(response)
|
|
except openai.AuthenticationError as e:
|
|
os.environ["MISTRAL_API_KEY"] = old_azure_key
|
|
print("good job got the correct error for openai when key not set")
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_completion_mistral_exception()
|
|
|
|
|
|
def test_completion_bedrock_invalid_role_exception():
|
|
"""
|
|
Test if litellm raises a BadRequestError for an invalid role on Bedrock
|
|
"""
|
|
try:
|
|
litellm.set_verbose = True
|
|
response = completion(
|
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
|
messages=[{"role": "very-bad-role", "content": "hello"}],
|
|
)
|
|
print(f"response: {response}")
|
|
print(response)
|
|
|
|
except Exception as e:
|
|
assert isinstance(
|
|
e, litellm.BadRequestError
|
|
), "Expected BadRequestError but got {}".format(type(e))
|
|
print("str(e) = {}".format(str(e)))
|
|
|
|
# This is important - We we previously returning a poorly formatted error string. Which was
|
|
# litellm.BadRequestError: litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'}
|
|
|
|
# IMPORTANT ASSERTION
|
|
assert (
|
|
(str(e))
|
|
== "litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'}"
|
|
)
|
|
|
|
|
|
def test_content_policy_exceptionimage_generation_openai():
|
|
try:
|
|
# this is ony a test - we needed some way to invoke the exception :(
|
|
litellm.set_verbose = True
|
|
response = litellm.image_generation(
|
|
prompt="where do i buy lethal drugs from", model="dall-e-3"
|
|
)
|
|
print(f"response: {response}")
|
|
assert len(response.data) > 0
|
|
except litellm.ContentPolicyViolationError as e:
|
|
print("caught a content policy violation error! Passed")
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
# test_content_policy_exceptionimage_generation_openai()
|
|
|
|
|
|
def test_content_policy_violation_error_streaming():
|
|
"""
|
|
Production Test.
|
|
"""
|
|
litellm.set_verbose = False
|
|
print("test_async_completion with stream")
|
|
|
|
async def test_get_response():
|
|
try:
|
|
response = await litellm.acompletion(
|
|
model="azure/chatgpt-v-2",
|
|
messages=[{"role": "user", "content": "say 1"}],
|
|
temperature=0,
|
|
top_p=1,
|
|
stream=True,
|
|
max_tokens=512,
|
|
presence_penalty=0,
|
|
frequency_penalty=0,
|
|
)
|
|
print(f"response: {response}")
|
|
|
|
num_finish_reason = 0
|
|
async for chunk in response:
|
|
print(chunk)
|
|
if chunk["choices"][0].get("finish_reason") is not None:
|
|
num_finish_reason += 1
|
|
print("finish_reason", chunk["choices"][0].get("finish_reason"))
|
|
|
|
assert (
|
|
num_finish_reason == 1
|
|
), f"expected only one finish reason. Got {num_finish_reason}"
|
|
except Exception as e:
|
|
pytest.fail(f"GOT exception for gpt-3.5 instruct In streaming{e}")
|
|
|
|
asyncio.run(test_get_response())
|
|
|
|
async def test_get_error():
|
|
try:
|
|
response = await litellm.acompletion(
|
|
model="azure/chatgpt-v-2",
|
|
messages=[
|
|
{"role": "user", "content": "where do i buy lethal drugs from"}
|
|
],
|
|
temperature=0,
|
|
top_p=1,
|
|
stream=True,
|
|
max_tokens=512,
|
|
presence_penalty=0,
|
|
frequency_penalty=0,
|
|
mock_response="Exception: content_filter_policy",
|
|
)
|
|
print(f"response: {response}")
|
|
|
|
num_finish_reason = 0
|
|
async for chunk in response:
|
|
print(chunk)
|
|
if chunk["choices"][0].get("finish_reason") is not None:
|
|
num_finish_reason += 1
|
|
print("finish_reason", chunk["choices"][0].get("finish_reason"))
|
|
|
|
pytest.fail(f"Expected to return 400 error In streaming{e}")
|
|
except Exception as e:
|
|
pass
|
|
|
|
asyncio.run(test_get_error())
|
|
|
|
|
|
def test_completion_perplexity_exception_on_openai_client():
|
|
try:
|
|
import openai
|
|
|
|
print("perplexity test\n\n")
|
|
litellm.set_verbose = False
|
|
## Test azure call
|
|
old_azure_key = os.environ["PERPLEXITYAI_API_KEY"]
|
|
|
|
# delete perplexityai api key to simulate bad api key
|
|
del os.environ["PERPLEXITYAI_API_KEY"]
|
|
|
|
# temporaily delete openai api key
|
|
original_openai_key = os.environ["OPENAI_API_KEY"]
|
|
del os.environ["OPENAI_API_KEY"]
|
|
|
|
response = completion(
|
|
model="perplexity/mistral-7b-instruct",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key
|
|
os.environ["OPENAI_API_KEY"] = original_openai_key
|
|
pytest.fail("Request should have failed - bad api key")
|
|
except openai.AuthenticationError as e:
|
|
os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key
|
|
os.environ["OPENAI_API_KEY"] = original_openai_key
|
|
print("exception: ", e)
|
|
assert (
|
|
"The api_key client option must be set either by passing api_key to the client or by setting the PERPLEXITY_API_KEY environment variable"
|
|
in str(e)
|
|
)
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# test_completion_perplexity_exception_on_openai_client()
|
|
|
|
|
|
def test_completion_perplexity_exception():
|
|
try:
|
|
import openai
|
|
|
|
print("perplexity test\n\n")
|
|
litellm.set_verbose = True
|
|
## Test azure call
|
|
old_azure_key = os.environ["PERPLEXITYAI_API_KEY"]
|
|
os.environ["PERPLEXITYAI_API_KEY"] = "good morning"
|
|
response = completion(
|
|
model="perplexity/mistral-7b-instruct",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key
|
|
pytest.fail("Request should have failed - bad api key")
|
|
except openai.AuthenticationError as e:
|
|
os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key
|
|
print("exception: ", e)
|
|
assert "PerplexityException" in str(e)
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
def test_completion_openai_api_key_exception():
|
|
try:
|
|
import openai
|
|
|
|
print("gpt-3.5 test\n\n")
|
|
litellm.set_verbose = True
|
|
## Test azure call
|
|
old_azure_key = os.environ["OPENAI_API_KEY"]
|
|
os.environ["OPENAI_API_KEY"] = "good morning"
|
|
response = completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
os.environ["OPENAI_API_KEY"] = old_azure_key
|
|
pytest.fail("Request should have failed - bad api key")
|
|
except openai.AuthenticationError as e:
|
|
os.environ["OPENAI_API_KEY"] = old_azure_key
|
|
print("exception: ", e)
|
|
assert "OpenAIException" in str(e)
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
|
|
|
|
# tesy_async_acompletion()
|
|
|
|
|
|
def test_router_completion_vertex_exception():
|
|
try:
|
|
import litellm
|
|
|
|
litellm.set_verbose = True
|
|
router = litellm.Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "vertex-gemini-pro",
|
|
"litellm_params": {
|
|
"model": "vertex_ai/gemini-pro",
|
|
"api_key": "good-morning",
|
|
},
|
|
},
|
|
]
|
|
)
|
|
response = router.completion(
|
|
model="vertex-gemini-pro",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
vertex_project="bad-project",
|
|
)
|
|
pytest.fail("Request should have failed - bad api key")
|
|
except Exception as e:
|
|
print("exception: ", e)
|
|
|
|
|
|
def test_litellm_completion_vertex_exception():
|
|
try:
|
|
import litellm
|
|
|
|
litellm.set_verbose = True
|
|
response = completion(
|
|
model="vertex_ai/gemini-pro",
|
|
api_key="good-morning",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
vertex_project="bad-project",
|
|
)
|
|
pytest.fail("Request should have failed - bad api key")
|
|
except Exception as e:
|
|
print("exception: ", e)
|
|
|
|
|
|
def test_litellm_predibase_exception():
|
|
"""
|
|
Test - Assert that the Predibase API Key is not returned on Authentication Errors
|
|
"""
|
|
try:
|
|
import litellm
|
|
|
|
litellm.set_verbose = True
|
|
response = completion(
|
|
model="predibase/llama-3-8b-instruct",
|
|
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
|
tenant_id="c4768f95",
|
|
api_key="hf-rawapikey",
|
|
)
|
|
pytest.fail("Request should have failed - bad api key")
|
|
except Exception as e:
|
|
assert "hf-rawapikey" not in str(e)
|
|
print("exception: ", e)
|
|
|
|
|
|
# # test_invalid_request_error(model="command-nightly")
|
|
# # Test 3: Rate Limit Errors
|
|
# def test_model_call(model):
|
|
# try:
|
|
# sample_text = "how does a court case get to the Supreme Court?"
|
|
# messages = [{ "content": sample_text,"role": "user"}]
|
|
# print(f"model: {model}")
|
|
# response = completion(model=model, messages=messages)
|
|
# except RateLimitError as e:
|
|
# print(f"headers: {e.response.headers}")
|
|
# return True
|
|
# # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
|
# # return True
|
|
# except Exception as e:
|
|
# print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
|
|
# traceback.print_exc()
|
|
# pass
|
|
# return False
|
|
# # Repeat each model 500 times
|
|
# # extended_models = [model for model in models for _ in range(250)]
|
|
# extended_models = ["azure/chatgpt-v-2" for _ in range(250)]
|
|
|
|
# def worker(model):
|
|
# return test_model_call(model)
|
|
|
|
# # Create a dictionary to store the results
|
|
# counts = {True: 0, False: 0}
|
|
|
|
# # Use Thread Pool Executor
|
|
# with ThreadPoolExecutor(max_workers=500) as executor:
|
|
# # Use map to start the operation in thread pool
|
|
# results = executor.map(worker, extended_models)
|
|
|
|
# # Iterate over results and count True/False
|
|
# for result in results:
|
|
# counts[result] += 1
|
|
|
|
# accuracy_score = counts[True]/(counts[True] + counts[False])
|
|
# print(f"accuracy_score: {accuracy_score}")
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"provider", ["predibase", "vertex_ai_beta", "anthropic", "databricks", "watsonx"]
|
|
)
|
|
def test_exception_mapping(provider):
|
|
"""
|
|
For predibase, run through a set of mock exceptions
|
|
|
|
assert that they are being mapped correctly
|
|
"""
|
|
litellm.set_verbose = True
|
|
error_map = {
|
|
400: litellm.BadRequestError,
|
|
401: litellm.AuthenticationError,
|
|
404: litellm.NotFoundError,
|
|
408: litellm.Timeout,
|
|
429: litellm.RateLimitError,
|
|
500: litellm.InternalServerError,
|
|
503: litellm.ServiceUnavailableError,
|
|
}
|
|
|
|
for code, expected_exception in error_map.items():
|
|
mock_response = Exception()
|
|
setattr(mock_response, "text", "This is an error message")
|
|
setattr(mock_response, "llm_provider", provider)
|
|
setattr(mock_response, "status_code", code)
|
|
|
|
response: Any = None
|
|
try:
|
|
response = completion(
|
|
model="{}/test-model".format(provider),
|
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
|
mock_response=mock_response,
|
|
)
|
|
except expected_exception:
|
|
continue
|
|
except Exception as e:
|
|
traceback.print_exc()
|
|
response = "{}".format(str(e))
|
|
pytest.fail(
|
|
"Did not raise expected exception. Expected={}, Return={},".format(
|
|
expected_exception, response
|
|
)
|
|
)
|
|
|
|
pass
|
|
|
|
|
|
def test_anthropic_tool_calling_exception():
|
|
"""
|
|
Related - https://github.com/BerriAI/litellm/issues/4348
|
|
"""
|
|
tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_current_weather",
|
|
"description": "Get the current weather in a given location",
|
|
"parameters": {},
|
|
},
|
|
}
|
|
]
|
|
try:
|
|
litellm.completion(
|
|
model="claude-3-5-sonnet-20240620",
|
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
|
tools=tools,
|
|
)
|
|
except litellm.BadRequestError:
|
|
pass
|
|
|
|
|
|
from typing import Optional, Union
|
|
|
|
from openai import AsyncOpenAI, OpenAI
|
|
|
|
|
|
def _pre_call_utils(
|
|
call_type: str,
|
|
data: dict,
|
|
client: Union[OpenAI, AsyncOpenAI],
|
|
sync_mode: bool,
|
|
streaming: Optional[bool],
|
|
):
|
|
if call_type == "embedding":
|
|
data["input"] = "Hello world!"
|
|
mapped_target: Any = client.embeddings.with_raw_response
|
|
if sync_mode:
|
|
original_function = litellm.embedding
|
|
else:
|
|
original_function = litellm.aembedding
|
|
elif call_type == "chat_completion":
|
|
data["messages"] = [{"role": "user", "content": "Hello world"}]
|
|
if streaming is True:
|
|
data["stream"] = True
|
|
mapped_target = client.chat.completions.with_raw_response # type: ignore
|
|
if sync_mode:
|
|
original_function = litellm.completion
|
|
else:
|
|
original_function = litellm.acompletion
|
|
elif call_type == "completion":
|
|
data["prompt"] = "Hello world"
|
|
if streaming is True:
|
|
data["stream"] = True
|
|
mapped_target = client.completions.with_raw_response # type: ignore
|
|
if sync_mode:
|
|
original_function = litellm.text_completion
|
|
else:
|
|
original_function = litellm.atext_completion
|
|
|
|
return data, original_function, mapped_target
|
|
|
|
|
|
def _pre_call_utils_httpx(
|
|
call_type: str,
|
|
data: dict,
|
|
client: Union[HTTPHandler, AsyncHTTPHandler],
|
|
sync_mode: bool,
|
|
streaming: Optional[bool],
|
|
):
|
|
mapped_target: Any = client.client
|
|
if call_type == "embedding":
|
|
data["input"] = "Hello world!"
|
|
|
|
if sync_mode:
|
|
original_function = litellm.embedding
|
|
else:
|
|
original_function = litellm.aembedding
|
|
elif call_type == "chat_completion":
|
|
data["messages"] = [{"role": "user", "content": "Hello world"}]
|
|
if streaming is True:
|
|
data["stream"] = True
|
|
|
|
if sync_mode:
|
|
original_function = litellm.completion
|
|
else:
|
|
original_function = litellm.acompletion
|
|
elif call_type == "completion":
|
|
data["prompt"] = "Hello world"
|
|
if streaming is True:
|
|
data["stream"] = True
|
|
if sync_mode:
|
|
original_function = litellm.text_completion
|
|
else:
|
|
original_function = litellm.atext_completion
|
|
|
|
return data, original_function, mapped_target
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"sync_mode",
|
|
[True, False],
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"provider, model, call_type, streaming",
|
|
[
|
|
("openai", "text-embedding-ada-002", "embedding", None),
|
|
("openai", "gpt-3.5-turbo", "chat_completion", False),
|
|
("openai", "gpt-3.5-turbo", "chat_completion", True),
|
|
("openai", "gpt-3.5-turbo-instruct", "completion", True),
|
|
("azure", "azure/chatgpt-v-2", "chat_completion", True),
|
|
("azure", "azure/text-embedding-ada-002", "embedding", True),
|
|
("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True),
|
|
],
|
|
)
|
|
@pytest.mark.asyncio
|
|
async def test_exception_with_headers(sync_mode, provider, model, call_type, streaming):
|
|
"""
|
|
User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds"
|
|
but Azure says to retry in at most 9s
|
|
|
|
```
|
|
{"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
|
|
```
|
|
"""
|
|
print(f"Received args: {locals()}")
|
|
import openai
|
|
|
|
if sync_mode:
|
|
if provider == "openai":
|
|
openai_client = openai.OpenAI(api_key="")
|
|
elif provider == "azure":
|
|
openai_client = openai.AzureOpenAI(
|
|
api_key="", base_url="", api_version=litellm.AZURE_DEFAULT_API_VERSION
|
|
)
|
|
else:
|
|
if provider == "openai":
|
|
openai_client = openai.AsyncOpenAI(api_key="")
|
|
elif provider == "azure":
|
|
openai_client = openai.AsyncAzureOpenAI(
|
|
api_key="", base_url="", api_version=litellm.AZURE_DEFAULT_API_VERSION
|
|
)
|
|
|
|
data = {"model": model}
|
|
data, original_function, mapped_target = _pre_call_utils(
|
|
call_type=call_type,
|
|
data=data,
|
|
client=openai_client,
|
|
sync_mode=sync_mode,
|
|
streaming=streaming,
|
|
)
|
|
|
|
cooldown_time = 30.0
|
|
|
|
def _return_exception(*args, **kwargs):
|
|
import datetime
|
|
|
|
from httpx import Headers, Request, Response
|
|
|
|
kwargs = {
|
|
"request": Request("POST", "https://www.google.com"),
|
|
"message": "Error code: 429 - Rate Limit Error!",
|
|
"body": {"detail": "Rate Limit Error!"},
|
|
"code": None,
|
|
"param": None,
|
|
"type": None,
|
|
"response": Response(
|
|
status_code=429,
|
|
headers=Headers(
|
|
{
|
|
"date": "Sat, 21 Sep 2024 22:56:53 GMT",
|
|
"server": "uvicorn",
|
|
"retry-after": "30",
|
|
"content-length": "30",
|
|
"content-type": "application/json",
|
|
}
|
|
),
|
|
request=Request("POST", "http://0.0.0.0:9000/chat/completions"),
|
|
),
|
|
"status_code": 429,
|
|
"request_id": None,
|
|
}
|
|
|
|
exception = Exception()
|
|
for k, v in kwargs.items():
|
|
setattr(exception, k, v)
|
|
raise exception
|
|
|
|
with patch.object(
|
|
mapped_target,
|
|
"create",
|
|
side_effect=_return_exception,
|
|
):
|
|
new_retry_after_mock_client = MagicMock(return_value=-1)
|
|
|
|
litellm.utils._get_retry_after_from_exception_header = (
|
|
new_retry_after_mock_client
|
|
)
|
|
|
|
exception_raised = False
|
|
try:
|
|
if sync_mode:
|
|
resp = original_function(**data, client=openai_client)
|
|
if streaming:
|
|
for chunk in resp:
|
|
continue
|
|
else:
|
|
resp = await original_function(**data, client=openai_client)
|
|
|
|
if streaming:
|
|
async for chunk in resp:
|
|
continue
|
|
|
|
except litellm.RateLimitError as e:
|
|
exception_raised = True
|
|
assert e.litellm_response_headers is not None
|
|
assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
|
|
|
|
if exception_raised is False:
|
|
print(resp)
|
|
assert exception_raised
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"sync_mode",
|
|
[True, False],
|
|
)
|
|
@pytest.mark.parametrize("streaming", [True, False])
|
|
@pytest.mark.parametrize(
|
|
"provider, model, call_type",
|
|
[
|
|
("anthropic", "claude-3-haiku-20240307", "chat_completion"),
|
|
],
|
|
)
|
|
@pytest.mark.asyncio
|
|
async def test_exception_with_headers_httpx(
|
|
sync_mode, provider, model, call_type, streaming
|
|
):
|
|
"""
|
|
User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds"
|
|
but Azure says to retry in at most 9s
|
|
|
|
```
|
|
{"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
|
|
```
|
|
"""
|
|
print(f"Received args: {locals()}")
|
|
import openai
|
|
|
|
if sync_mode:
|
|
client = HTTPHandler()
|
|
else:
|
|
client = AsyncHTTPHandler()
|
|
|
|
data = {"model": model}
|
|
data, original_function, mapped_target = _pre_call_utils_httpx(
|
|
call_type=call_type,
|
|
data=data,
|
|
client=client,
|
|
sync_mode=sync_mode,
|
|
streaming=streaming,
|
|
)
|
|
|
|
cooldown_time = 30.0
|
|
|
|
def _return_exception(*args, **kwargs):
|
|
import datetime
|
|
|
|
from httpx import Headers, HTTPStatusError, Request, Response
|
|
|
|
# Create the Request object
|
|
request = Request("POST", "http://0.0.0.0:9000/chat/completions")
|
|
|
|
# Create the Response object with the necessary headers and status code
|
|
response = Response(
|
|
status_code=429,
|
|
headers=Headers(
|
|
{
|
|
"date": "Sat, 21 Sep 2024 22:56:53 GMT",
|
|
"server": "uvicorn",
|
|
"retry-after": "30",
|
|
"content-length": "30",
|
|
"content-type": "application/json",
|
|
}
|
|
),
|
|
request=request,
|
|
)
|
|
|
|
# Create and raise the HTTPStatusError exception
|
|
raise HTTPStatusError(
|
|
message="Error code: 429 - Rate Limit Error!",
|
|
request=request,
|
|
response=response,
|
|
)
|
|
|
|
with patch.object(
|
|
mapped_target,
|
|
"send",
|
|
side_effect=_return_exception,
|
|
):
|
|
new_retry_after_mock_client = MagicMock(return_value=-1)
|
|
|
|
litellm.utils._get_retry_after_from_exception_header = (
|
|
new_retry_after_mock_client
|
|
)
|
|
|
|
exception_raised = False
|
|
try:
|
|
if sync_mode:
|
|
resp = original_function(**data, client=client)
|
|
if streaming:
|
|
for chunk in resp:
|
|
continue
|
|
else:
|
|
resp = await original_function(**data, client=client)
|
|
|
|
if streaming:
|
|
async for chunk in resp:
|
|
continue
|
|
|
|
except litellm.RateLimitError as e:
|
|
exception_raised = True
|
|
assert e.litellm_response_headers is not None
|
|
print("e.litellm_response_headers", e.litellm_response_headers)
|
|
assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
|
|
|
|
if exception_raised is False:
|
|
print(resp)
|
|
assert exception_raised
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("model", ["azure/chatgpt-v-2", "openai/gpt-3.5-turbo"])
|
|
async def test_bad_request_error_contains_httpx_response(model):
|
|
"""
|
|
Test that the BadRequestError contains the httpx response
|
|
|
|
Relevant issue: https://github.com/BerriAI/litellm/issues/6732
|
|
"""
|
|
try:
|
|
await litellm.acompletion(
|
|
model=model,
|
|
messages=[{"role": "user", "content": "Hello world"}],
|
|
bad_arg="bad_arg",
|
|
)
|
|
pytest.fail("Expected to raise BadRequestError")
|
|
except litellm.BadRequestError as e:
|
|
print("e.response", e.response)
|
|
print("vars(e.response)", vars(e.response))
|
|
assert e.response is not None
|