bug fixes and updates

2025-04-25 02:34:29 +00:00 · 2023-08-02 13:27:10 -07:00 · 2023-08-02 13:27:10 -07:00 · 104b9f21b0
commit 104b9f21b0
parent 74ddafb7ad
17 changed files with 646 additions and 330 deletions
--- a/build/lib/litellm/init.py
+++ b/build/lib/litellm/init.py
@ -1,2 +1,31 @@
-__version__ = "1.0.0"
-from .main import *  # Import all the symbols from main.py
+success_callback = []
+failure_callback = []
+set_verbose=False
+telemetry=True
+####### COMPLETION MODELS ###################
+open_ai_chat_completion_models = [
+  'gpt-3.5-turbo', 
+  'gpt-4'
+]
+open_ai_text_completion_models = [
+    'text-davinci-003'
+]
+
+cohere_models = [
+    'command-nightly',
+]
+
+anthropic_models = [
+  "claude-2", 
+  "claude-instant-1"
+]
+
+####### EMBEDDING MODELS ###################
+open_ai_embedding_models = [
+    'text-embedding-ada-002'
+]
+
+from .timeout import timeout
+from .utils import client, logging, exception_type  # Import all the symbols from main.py
+from .main import *  # Import all the symbols from main.py
+
--- a/build/lib/litellm/main.py
+++ b/build/lib/litellm/main.py
@ -1,49 +1,77 @@
 import os, openai, cohere, replicate, sys
 from typing import Any
-from func_timeout import func_set_timeout, FunctionTimedOut
 from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
-import json
 import traceback
-import threading
 import dotenv
 import traceback
-import subprocess
+import litellm
+from litellm import client, logging, exception_type, timeout, success_callback, failure_callback
+import random
 ####### ENVIRONMENT VARIABLES ###################
-# Loading env variables using dotenv
-dotenv.load_dotenv()
-set_verbose = False
-
-####### COMPLETION MODELS ###################
-open_ai_chat_completion_models = [
-  'gpt-3.5-turbo', 
-  'gpt-4'
-]
-open_ai_text_completion_models = [
-    'text-davinci-003'
-]
-
-cohere_models = [
-    'command-nightly',
-]
-
-anthropic_models = [
-  "claude-2", 
-  "claude-instant-1"
-]
-
-####### EMBEDDING MODELS ###################
-open_ai_embedding_models = [
-    'text-embedding-ada-002'
-]
-
-#############################################
+dotenv.load_dotenv() # Loading env variables using dotenv

+def get_optional_params(
+    # 12 optional params
+    functions = [],
+    function_call = "",
+    temperature = 1,
+    top_p = 1,
+    n = 1,
+    stream = False,
+    stop = None,
+    max_tokens = float('inf'),
+    presence_penalty = 0,
+    frequency_penalty = 0,
+    logit_bias = {},
+    user = "",
+):
+  optional_params = {}
+  if functions != []:
+      optional_params["functions"] = functions
+  if function_call != "":
+      optional_params["function_call"] = function_call
+  if temperature != 1:
+      optional_params["temperature"] = temperature
+  if top_p != 1:
+      optional_params["top_p"] = top_p
+  if n != 1:
+      optional_params["n"] = n
+  if stream:
+      optional_params["stream"] = stream
+  if stop != None:
+      optional_params["stop"] = stop
+  if max_tokens != float('inf'):
+      optional_params["max_tokens"] = max_tokens
+  if presence_penalty != 0:
+      optional_params["presence_penalty"] = presence_penalty
+  if frequency_penalty != 0:
+      optional_params["frequency_penalty"] = frequency_penalty
+  if logit_bias != {}:
+      optional_params["logit_bias"] = logit_bias
+  if user != "":
+      optional_params["user"] = user
+  return optional_params

 ####### COMPLETION ENDPOINTS ################
 #############################################
-@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure)
-def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None):
+@client
+@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
+def completion(
+    model, messages, # required params
+    # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
+    functions=[], function_call="", # optional params
+    temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
+    presence_penalty=0, frequency_penalty=0, logit_bias={}, user="",
+    # Optional liteLLM function params
+    *, force_timeout=60, azure=False, logger_fn=None, verbose=False
+  ):
  try:
+    # check if user passed in any of the OpenAI optional params
+    optional_params = get_optional_params(
+      functions=functions, function_call=function_call, 
+      temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
+      presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user
+    )
    if azure == True:
      # azure configs
      openai.api_type = "azure"
@ -51,21 +79,49 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
      openai.api_version = os.environ.get("AZURE_API_VERSION")
      openai.api_key = os.environ.get("AZURE_API_KEY")
      ## LOGGING
-      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
      ## COMPLETION CALL
      response = openai.ChatCompletion.create(
        engine=model,
-        messages = messages
+        messages = messages,
+        **optional_params
      )
-    elif "replicate" in model: 
+    elif model in litellm.open_ai_chat_completion_models:
+      openai.api_type = "openai"
+      openai.api_base = "https://api.openai.com/v1"
+      openai.api_version = None
+      openai.api_key = os.environ.get("OPENAI_API_KEY")
+      ## LOGGING
+      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
+
+      ## COMPLETION CALL
+      response = openai.ChatCompletion.create(
+        model=model,
+        messages = messages,
+        **optional_params
+      )
+    elif model in litellm.open_ai_text_completion_models:
+      openai.api_type = "openai"
+      openai.api_base = "https://api.openai.com/v1"
+      openai.api_version = None
+      openai.api_key = os.environ.get("OPENAI_API_KEY")
+      prompt = " ".join([message["content"] for message in messages])
+      ## LOGGING
+      logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
+      ## COMPLETION CALL
+      response = openai.Completion.create(
+          model=model,
+          prompt = prompt
+      )
+    elif "replicate" in model:
      # replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
      # checking in case user set it to REPLICATE_API_KEY instead 
-      if not os.environ.get("REPLICATE_API_TOKEN") and  os.environ.get("REPLICATE_API_KEY"):
+      if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
        replicate_api_token = os.environ.get("REPLICATE_API_KEY")
        os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
      prompt = " ".join([message["content"] for message in messages])
-      input = [{"prompt": prompt}]
-      if max_tokens:
+      input = {"prompt": prompt}
+      if max_tokens != float('inf'):
        input["max_length"] = max_tokens # for t5 models 
        input["max_new_tokens"] = max_tokens # for llama2 models 
      ## LOGGING
@ -90,7 +146,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
        ]
      }
      response = new_response
-    elif model in anthropic_models:
+    elif model in litellm.anthropic_models:
      #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
      prompt = f"{HUMAN_PROMPT}" 
      for message in messages:
@ -103,9 +159,10 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
          prompt += f"{HUMAN_PROMPT}{message['content']}"
      prompt += f"{AI_PROMPT}"
      anthropic = Anthropic()
-      if max_tokens:
+      # check if user passed in max_tokens != float('inf')
+      if max_tokens != float('inf'):
        max_tokens_to_sample = max_tokens
-      else: 
+      else:
        max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
      ## LOGGING
      logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
@ -127,9 +184,9 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
          }
        ]
      }
-      print(f"new response: {new_response}")
+      print_verbose(f"new response: {new_response}")
      response = new_response
-    elif model in cohere_models:
+    elif model in litellm.cohere_models:
      cohere_key = os.environ.get("COHERE_API_KEY")
      co = cohere.Client(cohere_key)
      prompt = " ".join([message["content"] for message in messages])
@ -146,7 +203,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
                  "finish_reason": "stop",
                  "index": 0,
                  "message": {
-                      "content": response[0],
+                      "content": response[0].text,
                      "role": "assistant"
                  }
              }
@ -154,7 +211,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
      }
      response = new_response

-    elif model in open_ai_chat_completion_models:
+    elif model in litellm.open_ai_chat_completion_models:
      openai.api_type = "openai"
      openai.api_base = "https://api.openai.com/v1"
      openai.api_version = None
@ -166,7 +223,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
          model=model,
          messages = messages
      )
-    elif model in open_ai_text_completion_models:
+    elif model in litellm.open_ai_text_completion_models:
      openai.api_type = "openai"
      openai.api_base = "https://api.openai.com/v1"
      openai.api_version = None
@ -181,249 +238,59 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
      )
    else: 
      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
+      args = locals()
+      raise ValueError(f"No valid completion model args passed in - {args}")
    return response
  except Exception as e:
-    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
-    raise e
+    # log the original exception
+    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
+    ## Map to OpenAI Exception
+    raise exception_type(model=model, original_exception=e)


 ### EMBEDDING ENDPOINTS ####################
-@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/
-def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
-  response = None
-  if azure == True:
-    # azure configs
-    openai.api_type = "azure"
-    openai.api_base = os.environ.get("AZURE_API_BASE")
-    openai.api_version = os.environ.get("AZURE_API_VERSION")
-    openai.api_key = os.environ.get("AZURE_API_KEY")
-    ## LOGGING
-    logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
-    ## EMBEDDING CALL
-    response = openai.Embedding.create(input=input, engine=model)
-    print_verbose(f"response_value: {str(response)[:50]}")
-  elif model in open_ai_embedding_models:
-    openai.api_type = "openai"
-    openai.api_base = "https://api.openai.com/v1"
-    openai.api_version = None
-    openai.api_key = os.environ.get("OPENAI_API_KEY")
-    ## LOGGING
-    logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
-    ## EMBEDDING CALL
-    response = openai.Embedding.create(input=input, model=model)
-    print_verbose(f"response_value: {str(response)[:50]}")
-  else: 
-    logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
-  
-  return response
-
-
-### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc.
-class litellm_client:
-  def __init__(self, success_callback=[], failure_callback=[], verbose=False):  # Constructor
-      set_verbose = verbose
-      self.success_callback = success_callback
-      self.failure_callback = failure_callback
-      self.logger_fn = None # if user passes in their own logging function
-      self.callback_list = list(set(self.success_callback + self.failure_callback))
-      self.set_callbacks()
-  
-  ## COMPLETION CALL 
-  def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any:
-    try:
-      self.logger_fn = logger_fn
-      response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input)
-      my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread
-      my_thread.start()
-      return response
-    except Exception as e: 
-      args = locals() # get all the param values
-      self.handle_failure(e, args)
-      raise e
-
-  ## EMBEDDING CALL 
-  def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any:
-    try:
-      self.logger_fn = logger_fn
-      response = embedding(model, input, azure=azure, logger_fn=self.handle_input)
-      my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread
-      my_thread.start()
-      return response
-    except Exception as e:
-      args = locals() # get all the param values 
-      self.handle_failure(e, args)
-      raise e
-
-
-  def set_callbacks(self):  #instantiate any external packages
-    for callback in self.callback_list: # only install what's required
-      if callback == "sentry":
-        try:
-          import sentry_sdk
-        except ImportError:
-          print_verbose("Package 'sentry_sdk' is missing. Installing it...")
-          subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
-          import sentry_sdk
-        self.sentry_sdk = sentry_sdk
-        self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
-        self.capture_exception = self.sentry_sdk.capture_exception
-        self.add_breadcrumb = self.sentry_sdk.add_breadcrumb
-      elif callback == "posthog":
-        try:
-          from posthog import Posthog
-        except:
-          print_verbose("Package 'posthog' is missing. Installing it...")
-          subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
-          from posthog import Posthog
-        self.posthog = Posthog(
-            project_api_key=os.environ.get("POSTHOG_API_KEY"),
-            host=os.environ.get("POSTHOG_API_URL"))
-      elif callback == "slack":
-        try:
-          from slack_bolt import App
-        except ImportError:
-          print_verbose("Package 'slack_bolt' is missing. Installing it...")
-          subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
-          from slack_bolt import App
-        self.slack_app = App(
-          token=os.environ.get("SLACK_API_TOKEN"),
-          signing_secret=os.environ.get("SLACK_API_SECRET")
-        )
-        self.alerts_channel = os.environ["SLACK_API_CHANNEL"]
-
-  def handle_input(self, model_call_details={}):
-      if len(model_call_details.keys()) > 0:
-        model = model_call_details["model"] if "model" in model_call_details else None
-        if model:
-          for callback in self.callback_list:
-            if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration
-              self.add_breadcrumb(
-                category=f'{model}',
-                message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)),
-                level='info',
-              )
-          if self.logger_fn and callable(self.logger_fn):
-            self.logger_fn(model_call_details)
-      pass
-
-  def handle_success(self, model, messages, additional_details):
-    success_handler = additional_details.pop("success_handler", None)
-    failure_handler = additional_details.pop("failure_handler", None)
-    additional_details["litellm_model"] = str(model)
-    additional_details["litellm_messages"] = str(messages)
-    for callback in self.success_callback:
-      try:
-        if callback == "posthog":
-          ph_obj = {}
-          for detail in additional_details:
-            ph_obj[detail] = additional_details[detail]
-          event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query"
-          if "user_id" in additional_details:
-            self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
-          else: 
-            self.posthog.capture(event_name, ph_obj)
-          pass
-        elif callback == "slack":
-          slack_msg = "" 
-          if len(additional_details.keys()) > 0:
-            for detail in additional_details: 
-              slack_msg += f"{detail}: {additional_details[detail]}\n"
-          slack_msg += f"Successful call"
-          self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
-      except:
-        pass
-    
-    if success_handler and callable(success_handler):
-      call_details = {
-        "model": model,
-        "messages": messages,
-        "additional_details": additional_details
-      }
-      success_handler(call_details)
-    pass
-
-  def handle_failure(self, exception, args):
-    args.pop("self")
-    additional_details = args.pop("additional_details", {})
-
-    success_handler = additional_details.pop("success_handler", None)
-    failure_handler = additional_details.pop("failure_handler", None)
-
-    for callback in self.failure_callback:
-      try:
-        if callback == "slack":
-          slack_msg = "" 
-          for param in args: 
-            slack_msg += f"{param}: {args[param]}\n"
-          if len(additional_details.keys()) > 0:
-            for detail in additional_details: 
-              slack_msg += f"{detail}: {additional_details[detail]}\n"
-          slack_msg += f"Traceback: {traceback.format_exc()}"
-          self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
-        elif callback == "sentry":
-          self.capture_exception(exception)
-        elif callback == "posthog":
-          if len(additional_details.keys()) > 0:
-            ph_obj = {}
-            for param in args: 
-              ph_obj[param] += args[param]
-            for detail in additional_details:
-              ph_obj[detail] = additional_details[detail]
-            event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query"
-            if "user_id" in additional_details:
-              self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
-            else: 
-              self.posthog.capture(event_name, ph_obj)
-          else: 
-            pass
-      except:
-        print(f"got an error calling {callback} - {traceback.format_exc()}")
-    
-    if failure_handler and callable(failure_handler):
-      call_details = {
-        "exception": exception,
-        "additional_details": additional_details
-      }
-      failure_handler(call_details)
-    pass
-####### HELPER FUNCTIONS ################
-
-#Logging function -> log the exact model details + what's being sent | Non-Blocking
-def logging(model, input, azure=False, additional_args={}, logger_fn=None):
+@client
+@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
+def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
  try:
-    model_call_details = {}
-    model_call_details["model"] = model
-    model_call_details["input"] = input
-    model_call_details["azure"] = azure
-    model_call_details["additional_args"] = additional_args
-    if logger_fn and callable(logger_fn):
-      try:
-        # log additional call details -> api key, etc. 
-        if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models:
-          model_call_details["api_type"] = openai.api_type
-          model_call_details["api_base"] = openai.api_base
-          model_call_details["api_version"] = openai.api_version
-          model_call_details["api_key"] = openai.api_key
-        elif "replicate" in model:
-          model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
-        elif model in anthropic_models:
-          model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
-        elif model in cohere_models:
-          model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
-        
-        logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
-      except:
-        print_verbose(f"Basic model call details: {model_call_details}")
-        print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
-        pass
-    else:
-      print_verbose(f"Basic model call details: {model_call_details}")
-      pass
-  except:
-    pass
-
-## Set verbose to true -> ```litellm.verbose = True```    
+    response = None
+    if azure == True:
+      # azure configs
+      openai.api_type = "azure"
+      openai.api_base = os.environ.get("AZURE_API_BASE")
+      openai.api_version = os.environ.get("AZURE_API_VERSION")
+      openai.api_key = os.environ.get("AZURE_API_KEY")
+      ## LOGGING
+      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
+      ## EMBEDDING CALL
+      response = openai.Embedding.create(input=input, engine=model)
+      print_verbose(f"response_value: {str(response)[:50]}")
+    elif model in litellm.open_ai_embedding_models:
+      openai.api_type = "openai"
+      openai.api_base = "https://api.openai.com/v1"
+      openai.api_version = None
+      openai.api_key = os.environ.get("OPENAI_API_KEY")
+      ## LOGGING
+      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
+      ## EMBEDDING CALL
+      response = openai.Embedding.create(input=input, model=model)
+      print_verbose(f"response_value: {str(response)[:50]}")
+    else: 
+      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
+      args = locals()
+      raise ValueError(f"No valid embedding model args passed in - {args}")
+    
+    return response
+  except Exception as e:
+    # log the original exception
+    logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e)
+    ## Map to OpenAI Exception
+    raise exception_type(model=model, original_exception=e)
+####### HELPER FUNCTIONS ################
+## Set verbose to true -> ```litellm.set_verbose = True```    
 def print_verbose(print_statement):
-  if set_verbose:
+  if litellm.set_verbose:
    print(f"LiteLLM: {print_statement}")
-    print("Get help - https://discord.com/invite/wuPM9dRgDw")
+    if random.random() <= 0.3:
+      print("Get help - https://discord.com/invite/wuPM9dRgDw")
+
--- a/build/lib/litellm/timeout.py
+++ b/build/lib/litellm/timeout.py
@ -0,0 +1,80 @@
+"""
+Module containing "timeout" decorator for sync and async callables.
+"""
+
+import asyncio
+
+from concurrent import futures
+from inspect import iscoroutinefunction
+from functools import wraps
+from threading import Thread
+from openai.error import Timeout
+
+
+def timeout(
+    timeout_duration: float = None, exception_to_raise = Timeout
+):
+    """
+    Wraps a function to raise the specified exception if execution time
+    is greater than the specified timeout.
+
+    Works with both synchronous and asynchronous callables, but with synchronous ones will introduce
+    some overhead due to the backend use of threads and asyncio.
+
+        :param float timeout_duration: Timeout duration in seconds. If none callable won't time out.
+        :param OpenAIError exception_to_raise: Exception to raise when the callable times out.
+            Defaults to TimeoutError.
+        :return: The decorated function.
+        :rtype: callable
+    """
+
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            async def async_func():
+                return func(*args, **kwargs)
+
+            thread = _LoopWrapper()
+            thread.start()
+            future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop)
+            try:
+                local_timeout_duration = timeout_duration
+                if "force_timeout" in kwargs:
+                    local_timeout_duration = kwargs["force_timeout"]
+                result = future.result(timeout=local_timeout_duration)
+            except futures.TimeoutError:
+                thread.stop_loop()
+                raise exception_to_raise()
+            thread.stop_loop()
+            return result
+
+        @wraps(func)
+        async def async_wrapper(*args, **kwargs):
+            try:
+                value = await asyncio.wait_for(
+                    func(*args, **kwargs), timeout=timeout_duration
+                )
+                return value
+            except asyncio.TimeoutError:
+                raise exception_to_raise()
+
+        if iscoroutinefunction(func):
+            return async_wrapper
+        return wrapper
+
+    return decorator
+
+
+class _LoopWrapper(Thread):
+    def __init__(self):
+        super().__init__(daemon=True)
+        self.loop = asyncio.new_event_loop()
+
+    def run(self) -> None:
+        self.loop.run_forever()
+        self.loop.call_soon_threadsafe(self.loop.close)
+
+    def stop_loop(self):
+        for task in asyncio.all_tasks(self.loop):
+            task.cancel()
+        self.loop.call_soon_threadsafe(self.loop.stop)
--- a/build/lib/litellm/utils.py
+++ b/build/lib/litellm/utils.py
@ -0,0 +1,316 @@
+import dotenv, json, traceback, threading
+import subprocess, os 
+import litellm, openai 
+import random, uuid, requests
+from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
+####### ENVIRONMENT VARIABLES ###################
+dotenv.load_dotenv() # Loading env variables using dotenv
+sentry_sdk_instance = None
+capture_exception = None
+add_breadcrumb = None
+posthog = None
+slack_app = None
+alerts_channel = None
+callback_list = []
+user_logger_fn = None
+additional_details = {}
+
+def print_verbose(print_statement):
+  if litellm.set_verbose:
+    print(f"LiteLLM: {print_statement}")
+    if random.random() <= 0.3:
+      print("Get help - https://discord.com/invite/wuPM9dRgDw")
+
+####### LOGGING ###################
+#Logging function -> log the exact model details + what's being sent | Non-Blocking
+def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
+  try:
+    model_call_details = {}
+    model_call_details["model"] = model
+    model_call_details["azure"] = azure
+    # log exception details
+    if exception:
+      model_call_details["original_exception"] = exception
+
+    if litellm.telemetry:
+      safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
+
+    model_call_details["input"] = input
+    # log additional call details -> api key, etc. 
+    if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
+      model_call_details["api_type"] = openai.api_type
+      model_call_details["api_base"] = openai.api_base
+      model_call_details["api_version"] = openai.api_version
+      model_call_details["api_key"] = openai.api_key
+    elif "replicate" in model:
+      model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
+    elif model in litellm.anthropic_models:
+      model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
+    elif model in litellm.cohere_models:
+      model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
+    model_call_details["additional_args"] = additional_args
+    ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
+    print_verbose(f"Basic model call details: {model_call_details}")
+    if logger_fn and callable(logger_fn):
+      try:
+        logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
+      except:
+        print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
+  except:
+    traceback.print_exc()
+    pass
+
+####### CLIENT ################### 
+# make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
+def client(original_function):
+    def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
+      try: 
+        global callback_list, add_breadcrumb
+        if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: 
+          callback_list = list(set(litellm.success_callback + litellm.failure_callback))
+          set_callbacks(callback_list=callback_list)
+        if add_breadcrumb:
+          add_breadcrumb(
+                category="litellm.llm_call",
+                message=f"Positional Args: {args}, Keyword Args: {kwargs}",
+                level="info",
+            )
+      except: # DO NOT BLOCK running the function because of this
+        print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
+      pass
+
+    def wrapper(*args, **kwargs):
+        try:
+          function_setup(args, kwargs)
+          ## MODEL CALL
+          result = original_function(*args, **kwargs)
+          ## LOG SUCCESS 
+          my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread
+          my_thread.start()
+          return result
+        except Exception as e:
+          traceback_exception = traceback.format_exc()
+          my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread
+          my_thread.start()
+          raise e
+    return wrapper
+
+####### HELPER FUNCTIONS ################
+def set_callbacks(callback_list):
+  global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
+  try:
+    for callback in callback_list:
+      if callback == "sentry":
+        try:
+            import sentry_sdk
+        except ImportError:
+            print_verbose("Package 'sentry_sdk' is missing. Installing it...")
+            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
+            import sentry_sdk
+        sentry_sdk_instance = sentry_sdk
+        sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
+        capture_exception = sentry_sdk_instance.capture_exception
+        add_breadcrumb = sentry_sdk_instance.add_breadcrumb 
+      elif callback == "posthog":
+        try:
+            from posthog import Posthog
+        except ImportError:
+            print_verbose("Package 'posthog' is missing. Installing it...")
+            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
+            from posthog import Posthog
+        posthog = Posthog(
+          project_api_key=os.environ.get("POSTHOG_API_KEY"),
+          host=os.environ.get("POSTHOG_API_URL"))
+      elif callback == "slack":
+        try:
+            from slack_bolt import App
+        except ImportError:
+            print_verbose("Package 'slack_bolt' is missing. Installing it...")
+            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
+            from slack_bolt import App
+        slack_app = App(
+          token=os.environ.get("SLACK_API_TOKEN"),
+          signing_secret=os.environ.get("SLACK_API_SECRET")
+        )
+        alerts_channel = os.environ["SLACK_API_CHANNEL"]
+        print_verbose(f"Initialized Slack App: {slack_app}")
+  except:
+    pass
+
+
+def handle_failure(exception, traceback_exception, args, kwargs):
+    global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
+    try:
+      print_verbose(f"handle_failure args: {args}")
+      print_verbose(f"handle_failure kwargs: {kwargs}")
+      
+      success_handler = additional_details.pop("success_handler", None)
+      failure_handler = additional_details.pop("failure_handler", None)
+      
+      additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
+      print_verbose(f"self.failure_callback: {litellm.failure_callback}")
+
+      print_verbose(f"additional_details: {additional_details}")
+      for callback in litellm.failure_callback:
+        try:
+          if callback == "slack":
+            slack_msg = "" 
+            if len(kwargs) > 0: 
+              for key in kwargs: 
+                slack_msg += f"{key}: {kwargs[key]}\n"
+            if len(args) > 0:
+              for i, arg in enumerate(args):
+                slack_msg += f"LiteLLM_Args_{str(i)}: {arg}"
+            for detail in additional_details: 
+              slack_msg += f"{detail}: {additional_details[detail]}\n"
+            slack_msg += f"Traceback: {traceback_exception}"
+            slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
+          elif callback == "sentry":
+            capture_exception(exception)
+          elif callback == "posthog": 
+            print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}")
+            ph_obj = {}
+            if len(kwargs) > 0: 
+              ph_obj = kwargs
+            if len(args) > 0:
+              for i, arg in enumerate(args):
+                ph_obj["litellm_args_" + str(i)] = arg
+            for detail in additional_details:
+              ph_obj[detail] = additional_details[detail]
+            event_name = additional_details["Event_Name"]
+            print_verbose(f"ph_obj: {ph_obj}")
+            print_verbose(f"PostHog Event Name: {event_name}")
+            if "user_id" in additional_details:
+              posthog.capture(additional_details["user_id"], event_name, ph_obj)
+            else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
+              unique_id = str(uuid.uuid4())
+              posthog.capture(unique_id, event_name)
+              print_verbose(f"successfully logged to PostHog!")
+        except:
+          print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
+          pass
+      
+      if failure_handler and callable(failure_handler):
+        call_details = {
+          "exception": exception,
+          "additional_details": additional_details
+        }
+        failure_handler(call_details)
+      pass
+    except:
+      pass
+
+def handle_success(*args, **kwargs):
+  try:
+    success_handler = additional_details.pop("success_handler", None)
+    failure_handler = additional_details.pop("failure_handler", None)
+    additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query")
+    for callback in litellm.success_callback:
+      try:
+        if callback == "posthog":
+          ph_obj = {}
+          for detail in additional_details:
+            ph_obj[detail] = additional_details[detail]
+          event_name = additional_details["Event_Name"]
+          if "user_id" in additional_details:
+            posthog.capture(additional_details["user_id"], event_name, ph_obj)
+          else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
+            unique_id = str(uuid.uuid4())
+            posthog.capture(unique_id, event_name, ph_obj)
+          pass
+        elif callback == "slack":
+          slack_msg = "" 
+          for detail in additional_details: 
+            slack_msg += f"{detail}: {additional_details[detail]}\n"
+          slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
+      except:
+        pass
+
+    if success_handler and callable(success_handler):
+      success_handler(args, kwargs)
+    pass
+  except:
+    pass
+
+
+def exception_type(model, original_exception):
+    try:
+      if isinstance(original_exception, OpenAIError):
+          # Handle the OpenAIError
+          raise original_exception
+      elif model:
+        error_str = str(original_exception)
+        if isinstance(original_exception, BaseException):
+          exception_type = type(original_exception).__name__
+        else:
+          exception_type = ""
+        if "claude" in model: #one of the anthropics
+          if "status_code" in original_exception:
+            print_verbose(f"status_code: {original_exception.status_code}")
+            if original_exception.status_code == 401:
+              raise AuthenticationError(f"AnthropicException - {original_exception.message}")
+            elif original_exception.status_code == 400:
+              raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
+            elif original_exception.status_code == 429:
+              raise RateLimitError(f"AnthropicException - {original_exception.message}")
+        elif "replicate" in model:
+          if "Incorrect authentication token" in error_str:
+            raise AuthenticationError(f"ReplicateException - {error_str}")
+          elif exception_type == "ModelError":
+            raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
+          elif "Request was throttled" in error_str:
+            raise RateLimitError(f"ReplicateException - {error_str}")
+          elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
+            raise ServiceUnavailableError(f"ReplicateException - {error_str}")
+        elif model == "command-nightly": #Cohere
+          if "invalid api token" in error_str or "No API key provided." in error_str:
+            raise AuthenticationError(f"CohereException - {error_str}")
+          elif "too many tokens" in error_str:
+            raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
+          elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
+            raise RateLimitError(f"CohereException - {original_exception.message}")
+        raise original_exception # base case - return the original exception
+      else:
+        raise original_exception
+    except:
+      raise original_exception
+
+def safe_crash_reporting(model=None, exception=None, azure=None):
+    data = {
+      "model": model,
+      "exception": str(exception),
+      "azure": azure
+    }
+    threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
+
+def litellm_telemetry(data):
+    # Load or generate the UUID
+    uuid_file = 'litellm_uuid.txt'
+    try:
+        # Try to open the file and load the UUID
+        with open(uuid_file, 'r') as file:
+            uuid_value = file.read()
+            if uuid_value:
+                uuid_value = uuid_value.strip()
+            else:
+                raise FileNotFoundError
+    except FileNotFoundError:
+        # Generate a new UUID if the file doesn't exist or is empty
+        new_uuid = uuid.uuid4()
+        uuid_value = str(new_uuid)
+        with open(uuid_file, 'w') as file:
+            file.write(uuid_value)
+
+    # Prepare the data to send to localhost:3000
+    payload = {
+        'uuid': uuid_value,
+        'data': data
+    }
+    print_verbose(f"payload: {payload}")
+    try:
+      # Make the POST request to localhost:3000
+      response = requests.post('https://litellm.berri.ai/logging', json=payload)
+      response.raise_for_status()  # Raise an exception for HTTP errors
+    except requests.exceptions.RequestException as e:
+        # Handle any errors in the request
+        pass
--- a/dist/litellm-0.1.2-py3-none-any.whl
+++ b/dist/litellm-0.1.2-py3-none-any.whl
--- a/dist/litellm-0.1.2.tar.gz
+++ b/dist/litellm-0.1.2.tar.gz
--- a/dist/litellm-0.1.216-py3-none-any.whl
+++ b/dist/litellm-0.1.216-py3-none-any.whl
--- a/dist/litellm-0.1.216.tar.gz
+++ b/dist/litellm-0.1.216.tar.gz
--- a/litellm.egg-info/PKG-INFO
+++ b/litellm.egg-info/PKG-INFO
@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: litellm
-Version: 0.1.207
+Version: 0.1.216
 Summary: Library to easily interface with LLM API providers
 Author: BerriAI
 License-File: LICENSE
--- a/litellm.egg-info/SOURCES.txt
+++ b/litellm.egg-info/SOURCES.txt
@ -1,5 +1,6 @@
 LICENSE
 README.md
+pyproject.toml
 setup.py
 litellm/__init__.py
 litellm/main.py
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/tests/test_client.py
+++ b/litellm/tests/test_client.py
@ -57,3 +57,4 @@ def test_good_azure_embedding():
        print(f"response: {str(response)[:50]}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
+
--- a/litellm/tests/test_no_client.py
+++ b/litellm/tests/test_no_client.py
@ -0,0 +1,23 @@
+#### What this tests ####
+#    This tests error logging (with custom user functions) for the `completion` + `embedding` endpoints without callbacks (i.e. slack, posthog, etc. not set)
+#    Requirements: Remove any env keys you have related to slack/posthog/etc. + anthropic api key (cause an exception)
+
+import sys, os
+import traceback
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import litellm
+from litellm import embedding, completion
+
+litellm.set_verbose = True
+
+model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
+
+user_message = "Hello, how are you?"
+messages = [{ "content": user_message,"role": "user"}]
+
+for model in model_fallback_list:
+    try:
+        response = embedding(model="text-embedding-ada-002", input=[user_message])
+        response = completion(model=model, messages=messages)
+    except Exception as e:
+        print(f"error occurred: {traceback.format_exc()}") 
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -234,41 +234,45 @@ def handle_success(*args, **kwargs):


 def exception_type(model, original_exception):
-    if isinstance(original_exception, OpenAIError):
-        # Handle the OpenAIError
-        raise original_exception
-    elif model:
-      error_str = str(original_exception)
-      if isinstance(original_exception, BaseException):
-        exception_type = type(original_exception).__name__
+    try:
+      if isinstance(original_exception, OpenAIError):
+          # Handle the OpenAIError
+          raise original_exception
+      elif model:
+        error_str = str(original_exception)
+        if isinstance(original_exception, BaseException):
+          exception_type = type(original_exception).__name__
+        else:
+          exception_type = ""
+        if "claude" in model: #one of the anthropics
+          if "status_code" in original_exception:
+            print_verbose(f"status_code: {original_exception.status_code}")
+            if original_exception.status_code == 401:
+              raise AuthenticationError(f"AnthropicException - {original_exception.message}")
+            elif original_exception.status_code == 400:
+              raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
+            elif original_exception.status_code == 429:
+              raise RateLimitError(f"AnthropicException - {original_exception.message}")
+        elif "replicate" in model:
+          if "Incorrect authentication token" in error_str:
+            raise AuthenticationError(f"ReplicateException - {error_str}")
+          elif exception_type == "ModelError":
+            raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
+          elif "Request was throttled" in error_str:
+            raise RateLimitError(f"ReplicateException - {error_str}")
+          elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
+            raise ServiceUnavailableError(f"ReplicateException - {error_str}")
+        elif model == "command-nightly": #Cohere
+          if "invalid api token" in error_str or "No API key provided." in error_str:
+            raise AuthenticationError(f"CohereException - {error_str}")
+          elif "too many tokens" in error_str:
+            raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
+          elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
+            raise RateLimitError(f"CohereException - {original_exception.message}")
+        raise original_exception # base case - return the original exception
      else:
-        exception_type = ""
-      if "claude" in model: #one of the anthropics
-        print_verbose(f"status_code: {original_exception.status_code}")
-        if original_exception.status_code == 401:
-          raise AuthenticationError(f"AnthropicException - {original_exception.message}")
-        elif original_exception.status_code == 400:
-          raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
-        elif original_exception.status_code == 429:
-          raise RateLimitError(f"AnthropicException - {original_exception.message}")
-      elif "replicate" in model:
-        if "Incorrect authentication token" in error_str:
-          raise AuthenticationError(f"ReplicateException - {error_str}")
-        elif exception_type == "ModelError":
-          raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
-        elif "Request was throttled" in error_str:
-          raise RateLimitError(f"ReplicateException - {error_str}")
-        elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
-          raise ServiceUnavailableError(f"ReplicateException - {error_str}")
-      elif model == "command-nightly": #Cohere
-        if "invalid api token" in error_str or "No API key provided." in error_str:
-          raise AuthenticationError(f"CohereException - {error_str}")
-        elif "too many tokens" in error_str:
-          raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
-        elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
-          raise RateLimitError(f"CohereException - {original_exception.message}")
-      raise original_exception # base case - return the original exception
-    else:
+        raise original_exception
+    except:
      raise original_exception

 def safe_crash_reporting(model=None, exception=None, azure=None):
@ -277,11 +281,9 @@ def safe_crash_reporting(model=None, exception=None, azure=None):
      "exception": str(exception),
      "azure": azure
    }
-    print(f"data in crash reporting: {data}")
    threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()

 def litellm_telemetry(data):
-    print(f"data in in litellm telemetry: {data}")
    # Load or generate the UUID
    uuid_file = 'litellm_uuid.txt'
    try:
@ -290,7 +292,6 @@ def litellm_telemetry(data):
            uuid_value = file.read()
            if uuid_value:
                uuid_value = uuid_value.strip()
-                print(f"Loaded UUID: {uuid_value}")
            else:
                raise FileNotFoundError
    except FileNotFoundError:
@ -299,7 +300,6 @@ def litellm_telemetry(data):
        uuid_value = str(new_uuid)
        with open(uuid_file, 'w') as file:
            file.write(uuid_value)
-        print(f"Generated and stored UUID: {uuid_value}")

    # Prepare the data to send to localhost:3000
    payload = {
@ -311,7 +311,6 @@ def litellm_telemetry(data):
      # Make the POST request to localhost:3000
      response = requests.post('https://litellm.berri.ai/logging', json=payload)
      response.raise_for_status()  # Raise an exception for HTTP errors
-      print('Request successfully sent!')
    except requests.exceptions.RequestException as e:
        # Handle any errors in the request
-        print(f'Error: {e}')
+        pass
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@ from setuptools import setup, find_packages

 setup(
    name='litellm',
-    version='0.1.214',
+    version='0.1.216',
    description='Library to easily interface with LLM API providers',
    author='BerriAI',
    packages=[