fixing exception mapping

2023-08-05 09:52:01 -07:00 · 2023-08-05 09:52:01 -07:00 · 92a13958ce
commit 92a13958ce
parent 9b0e9bf57c
8 changed files with 188 additions and 115 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -69,6 +69,6 @@ open_ai_embedding_models = [
    'text-embedding-ada-002'
 ]
 from .timeout import timeout
-from .utils import client, logging, exception_type  # Import all the symbols from main.py
+from .utils import client, logging, exception_type, get_optional_params  # Import all the symbols from main.py
 from .main import *  # Import all the symbols from main.py
 from .integrations import *
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/main.py
+++ b/litellm/main.py
@ -6,7 +6,7 @@ from functools import partial
 import dotenv
 import traceback
 import litellm
-from litellm import client, logging, exception_type, timeout
+from litellm import client, logging, exception_type, timeout, get_optional_params
 import random
 import asyncio
 from tenacity import (
@ -20,51 +20,6 @@ dotenv.load_dotenv() # Loading env variables using dotenv
 # TODO move this to utils.py
 # TODO add translations
 # TODO see if this worked - model_name == krrish
 def get_optional_params(
    # 12 optional params
    functions = [],
    function_call = "",
    temperature = 1,
    top_p = 1,
    n = 1,
    stream = False,
    stop = None,
    max_tokens = float('inf'),
    presence_penalty = 0,
    frequency_penalty = 0,
    logit_bias = {},
    user = "",
    deployment_id = None
 ):
  optional_params = {}
  if functions != []:
      optional_params["functions"] = functions
  if function_call != "":
      optional_params["function_call"] = function_call
  if temperature != 1:
      optional_params["temperature"] = temperature
  if top_p != 1:
      optional_params["top_p"] = top_p
  if n != 1:
      optional_params["n"] = n
  if stream:
      optional_params["stream"] = stream
  if stop != None:
      optional_params["stop"] = stop
  if max_tokens != float('inf'):
      optional_params["max_tokens"] = max_tokens
  if presence_penalty != 0:
      optional_params["presence_penalty"] = presence_penalty
  if frequency_penalty != 0:
      optional_params["frequency_penalty"] = frequency_penalty
  if logit_bias != {}:
      optional_params["logit_bias"] = logit_bias
  if user != "":
      optional_params["user"] = user
  if deployment_id != None:
      optional_params["deployment_id"] = user
  return optional_params
 ####### COMPLETION ENDPOINTS ################
 #############################################
 async def acompletion(*args, **kwargs):
@ -285,12 +240,13 @@ def completion(
      }
      response = new_response
    else: 
      ## LOGGING
      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
      args = locals()
      raise ValueError(f"No valid completion model args passed in - {args}")
    return response
  except Exception as e:
-    # log the original exception
+    ## LOGGING
    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
    ## Map to OpenAI Exception
    raise exception_type(model=model, original_exception=e)
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -8,6 +8,7 @@ from litellm import embedding, completion
 from concurrent.futures import ThreadPoolExecutor
 import pytest
 litellm.failure_callback = ["sentry"]
 # litellm.set_verbose = True
 #### What this tests ####
 #    This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
@ -22,11 +23,16 @@ import pytest
 # models = ["gpt-3.5-turbo", "chatgpt-test",  "claude-instant-1", "command-nightly"]
 models = ["command-nightly"]
 def logging_fn(model_call_dict):
-    print(f"model_call_dict: {model_call_dict['model']}")
+    if "model" in model_call_dict: 
        print(f"model_call_dict: {model_call_dict['model']}")
    else:
        print(f"model_call_dict: {model_call_dict}")
 # Test 1: Context Window Errors
@pytest.mark.parametrize("model", models)
 def test_context_window(model):
-    sample_text = "how does a court case get to the Supreme Court?" * 100000
+    sample_text = "how does a court case get to the Supreme Court?" * 5000
    messages = [{"content": sample_text, "role": "user"}]
    try:
        azure = model == "chatgpt-test"
@ -41,44 +47,61 @@ def test_context_window(model):
        return
    except Exception as e:
        print("Uncaught Error in test_context_window")
-        # print(f"Error Type: {type(e).__name__}")
+        print(f"Error Type: {type(e).__name__}")
        print(f"Uncaught Exception - {e}")
        pytest.fail(f"Error occurred: {e}")
    return
 test_context_window("command-nightly")
 # # Test 2: InvalidAuth Errors
 # def logger_fn(model_call_object: dict):
 #     print(f"model call details: {model_call_object}")
 # @pytest.mark.parametrize("model", models)
 # def invalid_auth(model): # set the model key to an invalid key, depending on the model 
 #     messages = [{ "content": "Hello, how are you?","role": "user"}]
 #     try: 
 #         azure = False
 #         if model == "gpt-3.5-turbo":
 #             os.environ["OPENAI_API_KEY"] = "bad-key"
 #         elif model == "chatgpt-test":
 #             os.environ["AZURE_API_KEY"] = "bad-key"
 #             azure = True
 #         elif model == "claude-instant-1":
 #             os.environ["ANTHROPIC_API_KEY"] = "bad-key"
 #         elif model == "command-nightly":
 #             os.environ["COHERE_API_KEY"] = "bad-key"
 #         elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
 #             os.environ["REPLICATE_API_KEY"] = "bad-key"
 #             os.environ["REPLICATE_API_TOKEN"] = "bad-key"
 #         print(f"model: {model}")
 #         response = completion(model=model, messages=messages, azure=azure)
 #         print(f"response: {response}")
 #     except AuthenticationError as e:
 #         return
 #     except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
 #         return
 #     except Exception as e:
 #         print(f"Uncaught Exception - {e}")
 #         pytest.fail(f"Error occurred: {e}")
 #     return
 # Test 2: InvalidAuth Errors
@pytest.mark.parametrize("model", models)
 def invalid_auth(model): # set the model key to an invalid key, depending on the model 
    messages = [{ "content": "Hello, how are you?","role": "user"}]
    temporary_key = None
    try: 
        azure = False
        if model == "gpt-3.5-turbo":
            temporary_key = os.environ["OPENAI_API_KEY"]
            os.environ["OPENAI_API_KEY"] = "bad-key"
        elif model == "chatgpt-test":
            temporary_key = os.environ["AZURE_API_KEY"]
            os.environ["AZURE_API_KEY"] = "bad-key"
            azure = True
        elif model == "claude-instant-1":
            temporary_key = os.environ["ANTHROPIC_API_KEY"]
            os.environ["ANTHROPIC_API_KEY"] = "bad-key"
        elif model == "command-nightly":
            temporary_key = os.environ["COHERE_API_KEY"]
            os.environ["COHERE_API_KEY"] = "bad-key"
        elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
            temporary_key = os.environ["REPLICATE_API_KEY"] 
            os.environ["REPLICATE_API_KEY"] = "bad-key"
        print(f"model: {model}")
        response = completion(model=model, messages=messages, azure=azure)
        print(f"response: {response}")
    except AuthenticationError as e:
        print(f"AuthenticationError Caught Exception - {e}")
    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
        print(f"OpenAIError Caught Exception - {e}")
    except Exception as e:
        print(type(e))
        print(e.__class__.__name__)
        print(f"Uncaught Exception - {e}")
        pytest.fail(f"Error occurred: {e}")
    if temporary_key != None: # reset the key
        if model == "gpt-3.5-turbo":
            os.environ["OPENAI_API_KEY"] = temporary_key
        elif model == "chatgpt-test":
            os.environ["AZURE_API_KEY"] = temporary_key
            azure = True
        elif model == "claude-instant-1":
            os.environ["ANTHROPIC_API_KEY"] = temporary_key
        elif model == "command-nightly":
            os.environ["COHERE_API_KEY"] = temporary_key
        elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
            os.environ["REPLICATE_API_KEY"] = temporary_key
    return
 invalid_auth("command-nightly")
 # # Test 3: Rate Limit Errors 
 # def test_model(model):
 #     try: 
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -25,41 +25,44 @@ def print_verbose(print_statement):
 ####### LOGGING ###################
 #Logging function -> log the exact model details + what's being sent | Non-Blocking
-def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
+def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=None, exception=None):
  try:
    model_call_details = {}
-    model_call_details["model"] = model
+    if model:
-    model_call_details["azure"] = azure
+      model_call_details["model"] = model
-    # log exception details
+    if azure:
      model_call_details["azure"] = azure
    if exception:
      model_call_details["original_exception"] = exception
    if litellm.telemetry:
      safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
-    model_call_details["input"] = input
+    if input:
      model_call_details["input"] = input
    # log additional call details -> api key, etc. 
-    if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
+    if model:
-      model_call_details["api_type"] = openai.api_type
+      if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
-      model_call_details["api_base"] = openai.api_base
+        model_call_details["api_type"] = openai.api_type
-      model_call_details["api_version"] = openai.api_version
+        model_call_details["api_base"] = openai.api_base
-      model_call_details["api_key"] = openai.api_key
+        model_call_details["api_version"] = openai.api_version
-    elif "replicate" in model:
+        model_call_details["api_key"] = openai.api_key
-      model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
+      elif "replicate" in model:
-    elif model in litellm.anthropic_models:
+        model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
-      model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
+      elif model in litellm.anthropic_models:
-    elif model in litellm.cohere_models:
+        model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
-      model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
+      elif model in litellm.cohere_models:
-    model_call_details["additional_args"] = additional_args
+        model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
      model_call_details["additional_args"] = additional_args
    ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
-    print_verbose(f"Basic model call details: {model_call_details}")
+    print_verbose(f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}")
    if logger_fn and callable(logger_fn):
      try:
        logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
-      except:
+      except Exception as e:
-        print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
+        print(f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
-  except:
+  except Exception as e:
-    traceback.print_exc()
+    print(f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
    pass
 ####### CLIENT ################### 
@ -67,7 +70,7 @@ def logging(model, input, azure=False, additional_args={}, logger_fn=None, excep
 def client(original_function):
    def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
      try: 
-        global callback_list, add_breadcrumb
+        global callback_list, add_breadcrumb, user_logger_fn
        if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: 
          callback_list = list(set(litellm.success_callback + litellm.failure_callback))
          set_callbacks(callback_list=callback_list,)
@ -77,13 +80,15 @@ def client(original_function):
                message=f"Positional Args: {args}, Keyword Args: {kwargs}",
                level="info",
            )
        if "logger_fn" in kwargs:
           user_logger_fn = kwargs["logger_fn"]
      except: # DO NOT BLOCK running the function because of this
        print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
      pass
    def wrapper(*args, **kwargs):
        try:
-          function_setup(args, kwargs)
+          function_setup(*args, **kwargs)
          ## MODEL CALL
          start_time = datetime.datetime.now()
          result = original_function(*args, **kwargs)
@ -100,6 +105,51 @@ def client(original_function):
    return wrapper
 ####### HELPER FUNCTIONS ################
 def get_optional_params(
    # 12 optional params
    functions = [],
    function_call = "",
    temperature = 1,
    top_p = 1,
    n = 1,
    stream = False,
    stop = None,
    max_tokens = float('inf'),
    presence_penalty = 0,
    frequency_penalty = 0,
    logit_bias = {},
    user = "",
    deployment_id = None
 ):
  optional_params = {}
  if functions != []:
      optional_params["functions"] = functions
  if function_call != "":
      optional_params["function_call"] = function_call
  if temperature != 1:
      optional_params["temperature"] = temperature
  if top_p != 1:
      optional_params["top_p"] = top_p
  if n != 1:
      optional_params["n"] = n
  if stream:
      optional_params["stream"] = stream
  if stop != None:
      optional_params["stop"] = stop
  if max_tokens != float('inf'):
      optional_params["max_tokens"] = max_tokens
  if presence_penalty != 0:
      optional_params["presence_penalty"] = presence_penalty
  if frequency_penalty != 0:
      optional_params["frequency_penalty"] = frequency_penalty
  if logit_bias != {}:
      optional_params["logit_bias"] = logit_bias
  if user != "":
      optional_params["user"] = user
  if deployment_id != None:
      optional_params["deployment_id"] = user
  return optional_params
 def set_callbacks(callback_list):
  global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger
  try:
@ -150,8 +200,8 @@ def set_callbacks(callback_list):
 def handle_failure(exception, traceback_exception, args, kwargs):
    global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
    try:
-      print_verbose(f"handle_failure args: {args}")
+      # print_verbose(f"handle_failure args: {args}")
-      print_verbose(f"handle_failure kwargs: {kwargs}")
+      # print_verbose(f"handle_failure kwargs: {kwargs}")
      success_handler = additional_details.pop("success_handler", None)
      failure_handler = additional_details.pop("failure_handler", None)
@ -159,7 +209,8 @@ def handle_failure(exception, traceback_exception, args, kwargs):
      additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
      print_verbose(f"self.failure_callback: {litellm.failure_callback}")
-      print_verbose(f"additional_details: {additional_details}")
+
      # print_verbose(f"additional_details: {additional_details}")
      for callback in litellm.failure_callback:
        try:
          if callback == "slack":
@ -206,7 +257,9 @@ def handle_failure(exception, traceback_exception, args, kwargs):
        }
        failure_handler(call_details)
      pass
-    except:
+    except Exception as e:
      ## LOGGING
      logging(logger_fn=user_logger_fn, exception=e)
      pass
 def handle_success(args, kwargs, result, start_time, end_time):
@ -245,12 +298,16 @@ def handle_success(args, kwargs, result, start_time, end_time):
    if success_handler and callable(success_handler):
      success_handler(args, kwargs)
    pass
-  except:
+  except Exception as e:
    ## LOGGING
    logging(logger_fn=user_logger_fn, exception=e)
    print_verbose(f"Success Callback Error - {traceback.format_exc()}")
    pass
 def exception_type(model, original_exception):
    global user_logger_fn
    exception_mapping_worked = False
    try:
      if isinstance(original_exception, OpenAIError):
          # Handle the OpenAIError
@ -265,32 +322,46 @@ def exception_type(model, original_exception):
          if "status_code" in original_exception:
            print_verbose(f"status_code: {original_exception.status_code}")
            if original_exception.status_code == 401:
              exception_mapping_worked = True
              raise AuthenticationError(f"AnthropicException - {original_exception.message}")
            elif original_exception.status_code == 400:
              exception_mapping_worked = True
              raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
            elif original_exception.status_code == 429:
              exception_mapping_worked = True
              raise RateLimitError(f"AnthropicException - {original_exception.message}")
        elif "replicate" in model:
          if "Incorrect authentication token" in error_str:
            exception_mapping_worked = True
            raise AuthenticationError(f"ReplicateException - {error_str}")
          elif exception_type == "ModelError":
            exception_mapping_worked = True
            raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
          elif "Request was throttled" in error_str:
            exception_mapping_worked = True
            raise RateLimitError(f"ReplicateException - {error_str}")
          elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
            raise ServiceUnavailableError(f"ReplicateException - {error_str}")
        elif model == "command-nightly": #Cohere
          if "invalid api token" in error_str or "No API key provided." in error_str:
            exception_mapping_worked = True
            raise AuthenticationError(f"CohereException - {error_str}")
          elif "too many tokens" in error_str:
            exception_mapping_worked = True
            raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
          elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
            exception_mapping_worked = True
            raise RateLimitError(f"CohereException - {original_exception.message}")
        raise original_exception # base case - return the original exception
      else:
        raise original_exception
-    except:
+    except Exception as e:
-      raise original_exception
+      ## LOGGING
      logging(logger_fn=user_logger_fn, additional_args={"original_exception": original_exception}, exception=e) 
      if exception_mapping_worked:
        raise e
      else: # don't let an error with mapping interrupt the user from receiving an error from the llm api calls 
         raise original_exception
 def safe_crash_reporting(model=None, exception=None, azure=None):
    data = {
@ -323,7 +394,6 @@ def litellm_telemetry(data):
        'uuid': uuid_value,
        'data': data
    }
    print_verbose(f"payload: {payload}")
    try:
      # Make the POST request to localhost:3000
      response = requests.post('https://litellm.berri.ai/logging', json=payload)
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,24 @@
 from setuptools import setup, find_packages
 setup(
    name='litellm',
    version='0.1.231',
    description='Library to easily interface with LLM API providers',
    author='BerriAI',
    packages=[
        'litellm'
    ],
    package_data={
        "litellm": ["integrations/*"],  # Specify the directory path relative to your package
    },
    install_requires=[
        'openai',
        'cohere',
        'pytest',
        'anthropic',
        'replicate',
        'python-dotenv',
        'openai[datalib]',
        'tenacity'
    ],
 )