diff --git a/litellm/__init__.py b/litellm/__init__.py index 38697c7519..7ed52d7cd3 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -25,6 +25,6 @@ open_ai_embedding_models = [ 'text-embedding-ada-002' ] -from .utils import client, logging # Import all the symbols from main.py +from .utils import client, logging, exception_type # Import all the symbols from main.py from .main import * # Import all the symbols from main.py diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 73352d569b..3e9ac33f0f 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 11eb85d841..1490fcfea0 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 9a0883ee25..71c54c1834 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index b5a6b1c8af..ec2de634f9 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -6,7 +6,7 @@ import traceback import dotenv import traceback import litellm -from litellm import client, logging +from litellm import client, logging, exception_type from litellm import success_callback, failure_callback import random ####### ENVIRONMENT VARIABLES ################### @@ -120,7 +120,7 @@ def completion( elif "replicate" in model: # replicate defaults to os.environ.get("REPLICATE_API_TOKEN") # checking in case user set it to REPLICATE_API_KEY instead - if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): + if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): replicate_api_token = os.environ.get("REPLICATE_API_KEY") os.environ["REPLICATE_API_TOKEN"] = replicate_api_token prompt = " ".join([message["content"] for message in messages]) @@ -207,7 +207,7 @@ def completion( "finish_reason": "stop", "index": 0, "message": { - "content": response[0], + "content": response[0].text, "role": "assistant" } } @@ -246,8 +246,10 @@ def completion( raise ValueError(f"No valid completion model args passed in - {args}") return response except Exception as e: - logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) - raise e + # log the original exception + logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e) + ## Map to OpenAI Exception + raise exception_type(model=model, original_exception=e) ### EMBEDDING ENDPOINTS #################### diff --git a/litellm/tests/__pycache__/test_bad_params.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_bad_params.cpython-311-pytest-7.4.0.pyc new file mode 100644 index 0000000000..1e3c4bc7d3 Binary files /dev/null and b/litellm/tests/__pycache__/test_bad_params.cpython-311-pytest-7.4.0.pyc differ diff --git a/litellm/tests/__pycache__/test_client.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_client.cpython-311-pytest-7.4.0.pyc new file mode 100644 index 0000000000..3bc5a08bbf Binary files /dev/null and b/litellm/tests/__pycache__/test_client.cpython-311-pytest-7.4.0.pyc differ diff --git a/litellm/tests/__pycache__/test_completion.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_completion.cpython-311-pytest-7.4.0.pyc new file mode 100644 index 0000000000..2baa7bc5f0 Binary files /dev/null and b/litellm/tests/__pycache__/test_completion.cpython-311-pytest-7.4.0.pyc differ diff --git a/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc new file mode 100644 index 0000000000..0e69bc88de Binary files /dev/null and b/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc differ diff --git a/litellm/tests/__pycache__/test_logging.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_logging.cpython-311-pytest-7.4.0.pyc new file mode 100644 index 0000000000..9f71ef3a11 Binary files /dev/null and b/litellm/tests/__pycache__/test_logging.cpython-311-pytest-7.4.0.pyc differ diff --git a/litellm/tests/__pycache__/test_model_fallback.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_model_fallback.cpython-311-pytest-7.4.0.pyc new file mode 100644 index 0000000000..864247d098 Binary files /dev/null and b/litellm/tests/__pycache__/test_model_fallback.cpython-311-pytest-7.4.0.pyc differ diff --git a/litellm/tests/test_bad_params.py b/litellm/tests/test_bad_params.py index dd1e8d5093..53872e5fad 100644 --- a/litellm/tests/test_bad_params.py +++ b/litellm/tests/test_bad_params.py @@ -26,7 +26,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"] user_message = "Hello, how are you?" messages = [{ "content": user_message,"role": "user"}] -model_val = "krrish is a model" +model_val = None def test_completion_with_empty_model(): @@ -35,4 +35,4 @@ def test_completion_with_empty_model(): response = completion(model=model_val, messages=messages) except Exception as e: print(f"error occurred: {e}") - pass + pass \ No newline at end of file diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index adb55a45e3..b9bbbebe4b 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1,5 +1,8 @@ import sys, os import traceback +from dotenv import load_dotenv +load_dotenv() +import os sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path import pytest import litellm diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py new file mode 100644 index 0000000000..38be0e2c15 --- /dev/null +++ b/litellm/tests/test_exceptions.py @@ -0,0 +1,129 @@ +from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError +import os +import sys +import traceback +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import litellm +from litellm import embedding, completion +from concurrent.futures import ThreadPoolExecutor +#### What this tests #### +# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type + + +# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate + +# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.) + +# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered + +models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"] + +# Test 1: Rate Limit Errors +def test_model(model): + try: + sample_text = "how does a court case get to the Supreme Court?" * 50000 + messages = [{ "content": sample_text,"role": "user"}] + azure = False + if model == "chatgpt-test": + azure = True + print(f"model: {model}") + response = completion(model=model, messages=messages, azure=azure) + except RateLimitError: + return True + except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server + return True + except Exception as e: + print(f"Uncaught Exception {model}: {type(e).__name__} - {e}") + pass + return False + +# Repeat each model 500 times +extended_models = [model for model in models for _ in range(250)] + +def worker(model): + return test_model(model) + +# Create a dictionary to store the results +counts = {True: 0, False: 0} + +# Use Thread Pool Executor +with ThreadPoolExecutor(max_workers=500) as executor: + # Use map to start the operation in thread pool + results = executor.map(worker, extended_models) + + # Iterate over results and count True/False + for result in results: + counts[result] += 1 + +accuracy_score = counts[True]/(counts[True] + counts[False]) +print(f"accuracy_score: {accuracy_score}") + +# Test 2: Context Window Errors +print("Testing Context Window Errors") +def test_model(model): # pass extremely long input + sample_text = "how does a court case get to the Supreme Court?" * 100000 + messages = [{ "content": sample_text,"role": "user"}] + try: + azure = False + if model == "chatgpt-test": + azure = True + print(f"model: {model}") + response = completion(model=model, messages=messages, azure=azure) + except InvalidRequestError: + return True + except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server + return True + except Exception as e: + print(f"Error Type: {type(e).__name__}") + print(f"Uncaught Exception - {e}") + pass + return False + +## TEST SCORE +true_val = 0 +for model in models: + if test_model(model=model) == True: + true_val += 1 +accuracy_score = true_val/len(models) +print(f"CTX WINDOW accuracy_score: {accuracy_score}") + +# Test 3: InvalidAuth Errors +def logger_fn(model_call_object: dict): + print(f"model call details: {model_call_object}") + + +def test_model(model): # set the model key to an invalid key, depending on the model + messages = [{ "content": "Hello, how are you?","role": "user"}] + try: + azure = False + if model == "gpt-3.5-turbo": + os.environ["OPENAI_API_KEY"] = "bad-key" + elif model == "chatgpt-test": + os.environ["AZURE_API_KEY"] = "bad-key" + azure = True + elif model == "claude-instant-1": + os.environ["ANTHROPIC_API_KEY"] = "bad-key" + elif model == "command-nightly": + os.environ["COHERE_API_KEY"] = "bad-key" + elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": + os.environ["REPLICATE_API_KEY"] = "bad-key" + os.environ["REPLICATE_API_TOKEN"] = "bad-key" + print(f"model: {model}") + response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn) + print(f"response: {response}") + except AuthenticationError as e: + return True + except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server + return True + except Exception as e: + print(f"Uncaught Exception - {e}") + pass + return False + +## TEST SCORE +true_val = 0 +for model in models: + if test_model(model=model) == True: + true_val += 1 +accuracy_score = true_val/len(models) +print(f"INVALID AUTH accuracy_score: {accuracy_score}") \ No newline at end of file diff --git a/litellm/tests/test_logging.py b/litellm/tests/test_logging.py index 21e4a879c6..dbacf8b472 100644 --- a/litellm/tests/test_logging.py +++ b/litellm/tests/test_logging.py @@ -7,7 +7,9 @@ sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the import litellm from litellm import embedding, completion -litellm.set_verbose = True +litellm.set_verbose = False + +score = 0 def logger_fn(model_call_object: dict): print(f"model call details: {model_call_object}") @@ -18,6 +20,7 @@ messages = [{ "content": user_message,"role": "user"}] # test on openai completion call try: response = completion(model="gpt-3.5-turbo", messages=messages) + score +=1 except: print(f"error occurred: {traceback.format_exc()}") pass @@ -25,6 +28,7 @@ except: # test on non-openai completion call try: response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) + score +=1 except: print(f"error occurred: {traceback.format_exc()}") pass @@ -32,20 +36,23 @@ except: # test on openai embedding call try: response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn) - print(f"response: {str(response)[:50]}") + score +=1 except: traceback.print_exc() # test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model try: response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn) - print(f"response: {str(response)[:50]}") except: + score +=1 # expect this to fail traceback.print_exc() # test on good azure openai embedding call try: response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn) - print(f"response: {str(response)[:50]}") + score +=1 except: traceback.print_exc() + + +print(f"Score: {score}, Overall score: {score/5}") \ No newline at end of file diff --git a/litellm/tests/test_model_fallback.py b/litellm/tests/test_model_fallback.py index b389e9f6ac..69dc1f68dd 100644 --- a/litellm/tests/test_model_fallback.py +++ b/litellm/tests/test_model_fallback.py @@ -12,7 +12,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"] litellm.set_verbose = True -model_fallback_list = ["replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1", "chatgpt-test"] +model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"] user_message = "Hello, how are you?" messages = [{ "content": user_message,"role": "user"}] @@ -21,6 +21,5 @@ for model in model_fallback_list: try: response = embedding(model="text-embedding-ada-002", input=[user_message]) response = completion(model=model, messages=messages) - print(response) except Exception as e: print(f"error occurred: {traceback.format_exc()}") diff --git a/litellm/utils.py b/litellm/utils.py index 593b754f03..c7eaa96d2b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -9,6 +9,7 @@ import litellm import os import openai import random +from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv sentry_sdk_instance = None @@ -29,12 +30,15 @@ def print_verbose(print_statement): ####### LOGGING ################### #Logging function -> log the exact model details + what's being sent | Non-Blocking -def logging(model, input, azure=False, additional_args={}, logger_fn=None): +def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None): try: model_call_details = {} model_call_details["model"] = model model_call_details["input"] = input model_call_details["azure"] = azure + # log exception details + if exception: + model_call_details["original_exception"] = exception # log additional call details -> api key, etc. if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models: model_call_details["api_type"] = openai.api_type @@ -222,3 +226,42 @@ def handle_success(*args, **kwargs): success_handler(args, kwargs) pass + +def exception_type(model, original_exception): + if isinstance(original_exception, OpenAIError): + # Handle the OpenAIError + raise original_exception + elif model: + error_str = str(original_exception) + if isinstance(original_exception, BaseException): + exception_type = type(original_exception).__name__ + else: + exception_type = "" + if "claude" in model: #one of the anthropics + print_verbose(f"status_code: {original_exception.status_code}") + if original_exception.status_code == 401: + raise AuthenticationError(f"AnthropicException - {original_exception.message}") + elif original_exception.status_code == 400: + raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}") + elif original_exception.status_code == 429: + raise RateLimitError(f"AnthropicException - {original_exception.message}") + elif "replicate" in model: + if "Incorrect authentication token" in error_str: + raise AuthenticationError(f"ReplicateException - {error_str}") + elif exception_type == "ModelError": + raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}") + elif "Request was throttled" in error_str: + raise RateLimitError(f"ReplicateException - {error_str}") + elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side + raise ServiceUnavailableError(f"ReplicateException - {error_str}") + elif model == "command-nightly": #Cohere + if "invalid api token" in error_str or "No API key provided." in error_str: + raise AuthenticationError(f"CohereException - {error_str}") + elif "too many tokens" in error_str: + raise InvalidRequestError(f"CohereException - {error_str}", f"{model}") + elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min) + raise RateLimitError(f"CohereException - {original_exception.message}") + raise original_exception # base case - return the original exception + else: + raise original_exception + \ No newline at end of file