exception mapping

2023-08-01 11:01:47 -07:00 · 2023-08-01 11:01:47 -07:00 · b6dcddefdf
commit b6dcddefdf
parent be0cd1ccf8
7 changed files with 182 additions and 7 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -25,6 +25,6 @@ open_ai_embedding_models = [
    'text-embedding-ada-002'
 ]
-from .utils import client, logging  # Import all the symbols from main.py
+from .utils import client, logging, exception_type  # Import all the symbols from main.py
 from .main import *  # Import all the symbols from main.py
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/main.py
+++ b/litellm/main.py
@ -6,7 +6,7 @@ import traceback
 import dotenv
 import traceback
 import litellm
-from litellm import client, logging
+from litellm import client, logging, exception_type
 from litellm import success_callback, failure_callback
 import random
 ####### ENVIRONMENT VARIABLES ###################
@ -76,6 +76,7 @@ def completion(
      temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
      presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user
    )
    print_verbose(f"os environment variables: {os.environ}")
    if azure == True:
      # azure configs
      openai.api_type = "azure"
@ -120,7 +121,7 @@ def completion(
    elif "replicate" in model:
      # replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
      # checking in case user set it to REPLICATE_API_KEY instead 
-      if not os.environ.get("REPLICATE_API_TOKEN") and  os.environ.get("REPLICATE_API_KEY"):
+      if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
        replicate_api_token = os.environ.get("REPLICATE_API_KEY")
        os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
      prompt = " ".join([message["content"] for message in messages])
@ -207,7 +208,7 @@ def completion(
                  "finish_reason": "stop",
                  "index": 0,
                  "message": {
-                      "content": response[0],
+                      "content": response[0].text,
                      "role": "assistant"
                  }
              }
@ -246,8 +247,10 @@ def completion(
      raise ValueError(f"No valid completion model args passed in - {args}")
    return response
  except Exception as e:
-    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
+    # log the original exception
-    raise e
+    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
    ## Map to OpenAI Exception
    raise exception_type(model=model, original_exception=e)
 ### EMBEDDING ENDPOINTS ####################
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -0,0 +1,129 @@
 from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError
 import os 
 import sys
 import traceback
 sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion
 from concurrent.futures import ThreadPoolExecutor
 #### What this tests ####
 #    This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
 # 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate
 # 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.)
 # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
 models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"]
 # Test 1: Rate Limit Errors 
 def test_model(model):
    try: 
        sample_text = "how does a court case get to the Supreme Court?" * 50000
        messages = [{ "content": sample_text,"role": "user"}]
        azure = False
        if model == "chatgpt-test":
            azure = True
        print(f"model: {model}")
        response = completion(model=model, messages=messages, azure=azure)
    except RateLimitError:
        return True
    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
        return True
    except Exception as e:
        print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
        pass
    return False
 # Repeat each model 500 times
 extended_models = [model for model in models for _ in range(250)]
 def worker(model):
    return test_model(model)
 # Create a dictionary to store the results
 counts = {True: 0, False: 0}
 # Use Thread Pool Executor
 with ThreadPoolExecutor(max_workers=500) as executor:
    # Use map to start the operation in thread pool
    results = executor.map(worker, extended_models)
    # Iterate over results and count True/False
    for result in results:
        counts[result] += 1
 accuracy_score = counts[True]/(counts[True] + counts[False])
 print(f"accuracy_score: {accuracy_score}")
 # Test 2: Context Window Errors 
 print("Testing Context Window Errors")
 def test_model(model): # pass extremely long input
    sample_text = "how does a court case get to the Supreme Court?" * 100000
    messages = [{ "content": sample_text,"role": "user"}]
    try: 
        azure = False
        if model == "chatgpt-test":
            azure = True
        print(f"model: {model}")
        response = completion(model=model, messages=messages, azure=azure)
    except InvalidRequestError:
        return True
    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
        return True
    except Exception as e:
        print(f"Error Type: {type(e).__name__}")
        print(f"Uncaught Exception - {e}")
        pass
    return False
 ## TEST SCORE
 true_val = 0
 for model in models: 
    if test_model(model=model) == True:
        true_val += 1
 accuracy_score = true_val/len(models)
 print(f"CTX WINDOW accuracy_score: {accuracy_score}")
 # Test 3: InvalidAuth Errors
 def logger_fn(model_call_object: dict):
    print(f"model call details: {model_call_object}")
 def test_model(model): # set the model key to an invalid key, depending on the model 
    messages = [{ "content": "Hello, how are you?","role": "user"}]
    try: 
        azure = False
        if model == "gpt-3.5-turbo":
            os.environ["OPENAI_API_KEY"] = "bad-key"
        elif model == "chatgpt-test":
            os.environ["AZURE_API_KEY"] = "bad-key"
            azure = True
        elif model == "claude-instant-1":
            os.environ["ANTHROPIC_API_KEY"] = "bad-key"
        elif model == "command-nightly":
            os.environ["COHERE_API_KEY"] = "bad-key"
        elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
            os.environ["REPLICATE_API_KEY"] = "bad-key"
            os.environ["REPLICATE_API_TOKEN"] = "bad-key"
        print(f"model: {model}")
        response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn)
        print(f"response: {response}")
    except AuthenticationError as e:
        return True
    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
        return True
    except Exception as e:
        print(f"Uncaught Exception - {e}")
        pass
    return False
 ## TEST SCORE
 true_val = 0
 for model in models: 
    if test_model(model=model) == True:
        true_val += 1
 accuracy_score = true_val/len(models)
 print(f"INVALID AUTH accuracy_score: {accuracy_score}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -9,6 +9,7 @@ import litellm
 import os 
 import openai 
 import random
 from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
 sentry_sdk_instance = None
@ -29,12 +30,15 @@ def print_verbose(print_statement):
 ####### LOGGING ###################
 #Logging function -> log the exact model details + what's being sent | Non-Blocking
-def logging(model, input, azure=False, additional_args={}, logger_fn=None):
+def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
  try:
    model_call_details = {}
    model_call_details["model"] = model
    model_call_details["input"] = input
    model_call_details["azure"] = azure
    # log exception details
    if exception:
      model_call_details["original_exception"] = exception
    # log additional call details -> api key, etc. 
    if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
      model_call_details["api_type"] = openai.api_type
@ -222,3 +226,42 @@ def handle_success(*args, **kwargs):
    success_handler(args, kwargs)
  pass
 def exception_type(model, original_exception):
    if isinstance(original_exception, OpenAIError):
        # Handle the OpenAIError
        raise original_exception
    elif model:
      error_str = str(original_exception)
      if isinstance(original_exception, BaseException):
        exception_type = type(original_exception).__name__
      else:
        exception_type = ""
      if "claude" in model: #one of the anthropics
        print_verbose(f"status_code: {original_exception.status_code}")
        if original_exception.status_code == 401:
          raise AuthenticationError(f"AnthropicException - {original_exception.message}")
        elif original_exception.status_code == 400:
          raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
        elif original_exception.status_code == 429:
          raise RateLimitError(f"AnthropicException - {original_exception.message}")
      elif "replicate" in model:
        if "Incorrect authentication token" in error_str:
          raise AuthenticationError(f"ReplicateException - {error_str}")
        elif exception_type == "ModelError":
          raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
        elif "Request was throttled" in error_str:
          raise RateLimitError(f"ReplicateException - {error_str}")
        elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
          raise ServiceUnavailableError(f"ReplicateException - {error_str}")
      elif model == "command-nightly": #Cohere
        if "invalid api token" in error_str or "No API key provided." in error_str:
          raise AuthenticationError(f"CohereException - {error_str}")
        elif "too many tokens" in error_str:
          raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
        elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
          raise RateLimitError(f"CohereException - {original_exception.message}")
      raise original_exception # base case - return the original exception
    else:
      raise original_exception