mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Merge pull request #30 from BerriAI/exception-mapping
Exception mapping
This commit is contained in:
commit
0276325f60
17 changed files with 198 additions and 15 deletions
|
@ -25,6 +25,6 @@ open_ai_embedding_models = [
|
||||||
'text-embedding-ada-002'
|
'text-embedding-ada-002'
|
||||||
]
|
]
|
||||||
|
|
||||||
from .utils import client, logging # Import all the symbols from main.py
|
from .utils import client, logging, exception_type # Import all the symbols from main.py
|
||||||
from .main import * # Import all the symbols from main.py
|
from .main import * # Import all the symbols from main.py
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -6,7 +6,7 @@ import traceback
|
||||||
import dotenv
|
import dotenv
|
||||||
import traceback
|
import traceback
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import client, logging
|
from litellm import client, logging, exception_type
|
||||||
from litellm import success_callback, failure_callback
|
from litellm import success_callback, failure_callback
|
||||||
import random
|
import random
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
|
@ -120,7 +120,7 @@ def completion(
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
||||||
# checking in case user set it to REPLICATE_API_KEY instead
|
# checking in case user set it to REPLICATE_API_KEY instead
|
||||||
if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
|
if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
|
||||||
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
|
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
|
||||||
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
|
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
@ -207,7 +207,7 @@ def completion(
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"message": {
|
"message": {
|
||||||
"content": response[0],
|
"content": response[0].text,
|
||||||
"role": "assistant"
|
"role": "assistant"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -246,8 +246,10 @@ def completion(
|
||||||
raise ValueError(f"No valid completion model args passed in - {args}")
|
raise ValueError(f"No valid completion model args passed in - {args}")
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
# log the original exception
|
||||||
raise e
|
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
|
||||||
|
## Map to OpenAI Exception
|
||||||
|
raise exception_type(model=model, original_exception=e)
|
||||||
|
|
||||||
|
|
||||||
### EMBEDDING ENDPOINTS ####################
|
### EMBEDDING ENDPOINTS ####################
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -26,7 +26,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
|
||||||
|
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{ "content": user_message,"role": "user"}]
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
model_val = "krrish is a model"
|
model_val = None
|
||||||
|
|
||||||
|
|
||||||
def test_completion_with_empty_model():
|
def test_completion_with_empty_model():
|
||||||
|
@ -35,4 +35,4 @@ def test_completion_with_empty_model():
|
||||||
response = completion(model=model_val, messages=messages)
|
response = completion(model=model_val, messages=messages)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"error occurred: {e}")
|
print(f"error occurred: {e}")
|
||||||
pass
|
pass
|
|
@ -1,5 +1,8 @@
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
import os
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
|
|
129
litellm/tests/test_exceptions.py
Normal file
129
litellm/tests/test_exceptions.py
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
||||||
|
import litellm
|
||||||
|
from litellm import embedding, completion
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
#### What this tests ####
|
||||||
|
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
|
||||||
|
|
||||||
|
|
||||||
|
# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate
|
||||||
|
|
||||||
|
# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.)
|
||||||
|
|
||||||
|
# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
|
||||||
|
|
||||||
|
models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"]
|
||||||
|
|
||||||
|
# Test 1: Rate Limit Errors
|
||||||
|
def test_model(model):
|
||||||
|
try:
|
||||||
|
sample_text = "how does a court case get to the Supreme Court?" * 50000
|
||||||
|
messages = [{ "content": sample_text,"role": "user"}]
|
||||||
|
azure = False
|
||||||
|
if model == "chatgpt-test":
|
||||||
|
azure = True
|
||||||
|
print(f"model: {model}")
|
||||||
|
response = completion(model=model, messages=messages, azure=azure)
|
||||||
|
except RateLimitError:
|
||||||
|
return True
|
||||||
|
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Repeat each model 500 times
|
||||||
|
extended_models = [model for model in models for _ in range(250)]
|
||||||
|
|
||||||
|
def worker(model):
|
||||||
|
return test_model(model)
|
||||||
|
|
||||||
|
# Create a dictionary to store the results
|
||||||
|
counts = {True: 0, False: 0}
|
||||||
|
|
||||||
|
# Use Thread Pool Executor
|
||||||
|
with ThreadPoolExecutor(max_workers=500) as executor:
|
||||||
|
# Use map to start the operation in thread pool
|
||||||
|
results = executor.map(worker, extended_models)
|
||||||
|
|
||||||
|
# Iterate over results and count True/False
|
||||||
|
for result in results:
|
||||||
|
counts[result] += 1
|
||||||
|
|
||||||
|
accuracy_score = counts[True]/(counts[True] + counts[False])
|
||||||
|
print(f"accuracy_score: {accuracy_score}")
|
||||||
|
|
||||||
|
# Test 2: Context Window Errors
|
||||||
|
print("Testing Context Window Errors")
|
||||||
|
def test_model(model): # pass extremely long input
|
||||||
|
sample_text = "how does a court case get to the Supreme Court?" * 100000
|
||||||
|
messages = [{ "content": sample_text,"role": "user"}]
|
||||||
|
try:
|
||||||
|
azure = False
|
||||||
|
if model == "chatgpt-test":
|
||||||
|
azure = True
|
||||||
|
print(f"model: {model}")
|
||||||
|
response = completion(model=model, messages=messages, azure=azure)
|
||||||
|
except InvalidRequestError:
|
||||||
|
return True
|
||||||
|
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error Type: {type(e).__name__}")
|
||||||
|
print(f"Uncaught Exception - {e}")
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
## TEST SCORE
|
||||||
|
true_val = 0
|
||||||
|
for model in models:
|
||||||
|
if test_model(model=model) == True:
|
||||||
|
true_val += 1
|
||||||
|
accuracy_score = true_val/len(models)
|
||||||
|
print(f"CTX WINDOW accuracy_score: {accuracy_score}")
|
||||||
|
|
||||||
|
# Test 3: InvalidAuth Errors
|
||||||
|
def logger_fn(model_call_object: dict):
|
||||||
|
print(f"model call details: {model_call_object}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_model(model): # set the model key to an invalid key, depending on the model
|
||||||
|
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||||
|
try:
|
||||||
|
azure = False
|
||||||
|
if model == "gpt-3.5-turbo":
|
||||||
|
os.environ["OPENAI_API_KEY"] = "bad-key"
|
||||||
|
elif model == "chatgpt-test":
|
||||||
|
os.environ["AZURE_API_KEY"] = "bad-key"
|
||||||
|
azure = True
|
||||||
|
elif model == "claude-instant-1":
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
|
||||||
|
elif model == "command-nightly":
|
||||||
|
os.environ["COHERE_API_KEY"] = "bad-key"
|
||||||
|
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
|
||||||
|
os.environ["REPLICATE_API_KEY"] = "bad-key"
|
||||||
|
os.environ["REPLICATE_API_TOKEN"] = "bad-key"
|
||||||
|
print(f"model: {model}")
|
||||||
|
response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn)
|
||||||
|
print(f"response: {response}")
|
||||||
|
except AuthenticationError as e:
|
||||||
|
return True
|
||||||
|
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Uncaught Exception - {e}")
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
## TEST SCORE
|
||||||
|
true_val = 0
|
||||||
|
for model in models:
|
||||||
|
if test_model(model=model) == True:
|
||||||
|
true_val += 1
|
||||||
|
accuracy_score = true_val/len(models)
|
||||||
|
print(f"INVALID AUTH accuracy_score: {accuracy_score}")
|
|
@ -7,7 +7,9 @@ sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = False
|
||||||
|
|
||||||
|
score = 0
|
||||||
|
|
||||||
def logger_fn(model_call_object: dict):
|
def logger_fn(model_call_object: dict):
|
||||||
print(f"model call details: {model_call_object}")
|
print(f"model call details: {model_call_object}")
|
||||||
|
@ -18,6 +20,7 @@ messages = [{ "content": user_message,"role": "user"}]
|
||||||
# test on openai completion call
|
# test on openai completion call
|
||||||
try:
|
try:
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
|
score +=1
|
||||||
except:
|
except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
@ -25,6 +28,7 @@ except:
|
||||||
# test on non-openai completion call
|
# test on non-openai completion call
|
||||||
try:
|
try:
|
||||||
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
||||||
|
score +=1
|
||||||
except:
|
except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
@ -32,20 +36,23 @@ except:
|
||||||
# test on openai embedding call
|
# test on openai embedding call
|
||||||
try:
|
try:
|
||||||
response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
|
response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
|
||||||
print(f"response: {str(response)[:50]}")
|
score +=1
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
# test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
|
# test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
|
||||||
try:
|
try:
|
||||||
response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
|
response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
|
||||||
print(f"response: {str(response)[:50]}")
|
|
||||||
except:
|
except:
|
||||||
|
score +=1 # expect this to fail
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
# test on good azure openai embedding call
|
# test on good azure openai embedding call
|
||||||
try:
|
try:
|
||||||
response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
|
response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
|
||||||
print(f"response: {str(response)[:50]}")
|
score +=1
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
|
print(f"Score: {score}, Overall score: {score/5}")
|
|
@ -12,7 +12,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
model_fallback_list = ["replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1", "chatgpt-test"]
|
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
|
||||||
|
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{ "content": user_message,"role": "user"}]
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
@ -21,6 +21,5 @@ for model in model_fallback_list:
|
||||||
try:
|
try:
|
||||||
response = embedding(model="text-embedding-ada-002", input=[user_message])
|
response = embedding(model="text-embedding-ada-002", input=[user_message])
|
||||||
response = completion(model=model, messages=messages)
|
response = completion(model=model, messages=messages)
|
||||||
print(response)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
|
|
@ -9,6 +9,7 @@ import litellm
|
||||||
import os
|
import os
|
||||||
import openai
|
import openai
|
||||||
import random
|
import random
|
||||||
|
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
sentry_sdk_instance = None
|
sentry_sdk_instance = None
|
||||||
|
@ -29,12 +30,15 @@ def print_verbose(print_statement):
|
||||||
|
|
||||||
####### LOGGING ###################
|
####### LOGGING ###################
|
||||||
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||||
def logging(model, input, azure=False, additional_args={}, logger_fn=None):
|
def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
|
||||||
try:
|
try:
|
||||||
model_call_details = {}
|
model_call_details = {}
|
||||||
model_call_details["model"] = model
|
model_call_details["model"] = model
|
||||||
model_call_details["input"] = input
|
model_call_details["input"] = input
|
||||||
model_call_details["azure"] = azure
|
model_call_details["azure"] = azure
|
||||||
|
# log exception details
|
||||||
|
if exception:
|
||||||
|
model_call_details["original_exception"] = exception
|
||||||
# log additional call details -> api key, etc.
|
# log additional call details -> api key, etc.
|
||||||
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
|
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
|
||||||
model_call_details["api_type"] = openai.api_type
|
model_call_details["api_type"] = openai.api_type
|
||||||
|
@ -222,3 +226,42 @@ def handle_success(*args, **kwargs):
|
||||||
success_handler(args, kwargs)
|
success_handler(args, kwargs)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def exception_type(model, original_exception):
|
||||||
|
if isinstance(original_exception, OpenAIError):
|
||||||
|
# Handle the OpenAIError
|
||||||
|
raise original_exception
|
||||||
|
elif model:
|
||||||
|
error_str = str(original_exception)
|
||||||
|
if isinstance(original_exception, BaseException):
|
||||||
|
exception_type = type(original_exception).__name__
|
||||||
|
else:
|
||||||
|
exception_type = ""
|
||||||
|
if "claude" in model: #one of the anthropics
|
||||||
|
print_verbose(f"status_code: {original_exception.status_code}")
|
||||||
|
if original_exception.status_code == 401:
|
||||||
|
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
|
||||||
|
elif original_exception.status_code == 400:
|
||||||
|
raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
|
||||||
|
elif original_exception.status_code == 429:
|
||||||
|
raise RateLimitError(f"AnthropicException - {original_exception.message}")
|
||||||
|
elif "replicate" in model:
|
||||||
|
if "Incorrect authentication token" in error_str:
|
||||||
|
raise AuthenticationError(f"ReplicateException - {error_str}")
|
||||||
|
elif exception_type == "ModelError":
|
||||||
|
raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
|
||||||
|
elif "Request was throttled" in error_str:
|
||||||
|
raise RateLimitError(f"ReplicateException - {error_str}")
|
||||||
|
elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
|
||||||
|
raise ServiceUnavailableError(f"ReplicateException - {error_str}")
|
||||||
|
elif model == "command-nightly": #Cohere
|
||||||
|
if "invalid api token" in error_str or "No API key provided." in error_str:
|
||||||
|
raise AuthenticationError(f"CohereException - {error_str}")
|
||||||
|
elif "too many tokens" in error_str:
|
||||||
|
raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
|
||||||
|
elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
|
||||||
|
raise RateLimitError(f"CohereException - {original_exception.message}")
|
||||||
|
raise original_exception # base case - return the original exception
|
||||||
|
else:
|
||||||
|
raise original_exception
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue