bug fixes and updates

2023-08-02 13:27:10 -07:00 · 2023-08-02 13:27:10 -07:00 · 85f1591413
commit 85f1591413
parent efe82c6bb6
17 changed files with 646 additions and 330 deletions
--- a/build/lib/litellm/init.py
+++ b/build/lib/litellm/init.py
@ -1,2 +1,31 @@
-__version__ = "1.0.0"
+success_callback = []
 failure_callback = []
 set_verbose=False
 telemetry=True
 ####### COMPLETION MODELS ###################
 open_ai_chat_completion_models = [
  'gpt-3.5-turbo', 
  'gpt-4'
 ]
 open_ai_text_completion_models = [
    'text-davinci-003'
 ]
 cohere_models = [
    'command-nightly',
 ]
 anthropic_models = [
  "claude-2", 
  "claude-instant-1"
 ]
 ####### EMBEDDING MODELS ###################
 open_ai_embedding_models = [
    'text-embedding-ada-002'
 ]
 from .timeout import timeout
 from .utils import client, logging, exception_type  # Import all the symbols from main.py
 from .main import *  # Import all the symbols from main.py
--- a/build/lib/litellm/main.py
+++ b/build/lib/litellm/main.py
@ -1,49 +1,77 @@
 import os, openai, cohere, replicate, sys
 from typing import Any
 from func_timeout import func_set_timeout, FunctionTimedOut
 from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
 import json
 import traceback
 import threading
 import dotenv
 import traceback
-import subprocess
+import litellm
 from litellm import client, logging, exception_type, timeout, success_callback, failure_callback
 import random
 ####### ENVIRONMENT VARIABLES ###################
-# Loading env variables using dotenv
+dotenv.load_dotenv() # Loading env variables using dotenv
 dotenv.load_dotenv()
 set_verbose = False
 ####### COMPLETION MODELS ###################
 open_ai_chat_completion_models = [
  'gpt-3.5-turbo', 
  'gpt-4'
 ]
 open_ai_text_completion_models = [
    'text-davinci-003'
 ]
 cohere_models = [
    'command-nightly',
 ]
 anthropic_models = [
  "claude-2", 
  "claude-instant-1"
 ]
 ####### EMBEDDING MODELS ###################
 open_ai_embedding_models = [
    'text-embedding-ada-002'
 ]
 #############################################
 def get_optional_params(
    # 12 optional params
    functions = [],
    function_call = "",
    temperature = 1,
    top_p = 1,
    n = 1,
    stream = False,
    stop = None,
    max_tokens = float('inf'),
    presence_penalty = 0,
    frequency_penalty = 0,
    logit_bias = {},
    user = "",
 ):
  optional_params = {}
  if functions != []:
      optional_params["functions"] = functions
  if function_call != "":
      optional_params["function_call"] = function_call
  if temperature != 1:
      optional_params["temperature"] = temperature
  if top_p != 1:
      optional_params["top_p"] = top_p
  if n != 1:
      optional_params["n"] = n
  if stream:
      optional_params["stream"] = stream
  if stop != None:
      optional_params["stop"] = stop
  if max_tokens != float('inf'):
      optional_params["max_tokens"] = max_tokens
  if presence_penalty != 0:
      optional_params["presence_penalty"] = presence_penalty
  if frequency_penalty != 0:
      optional_params["frequency_penalty"] = frequency_penalty
  if logit_bias != {}:
      optional_params["logit_bias"] = logit_bias
  if user != "":
      optional_params["user"] = user
  return optional_params
 ####### COMPLETION ENDPOINTS ################
 #############################################
-@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure)
+@client
-def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None):
+@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
 def completion(
    model, messages, # required params
    # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
    functions=[], function_call="", # optional params
    temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
    presence_penalty=0, frequency_penalty=0, logit_bias={}, user="",
    # Optional liteLLM function params
    *, force_timeout=60, azure=False, logger_fn=None, verbose=False
  ):
  try:
    # check if user passed in any of the OpenAI optional params
    optional_params = get_optional_params(
      functions=functions, function_call=function_call, 
      temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
      presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user
    )
    if azure == True:
      # azure configs
      openai.api_type = "azure"
@ -51,11 +79,39 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
      openai.api_version = os.environ.get("AZURE_API_VERSION")
      openai.api_key = os.environ.get("AZURE_API_KEY")
      ## LOGGING
-      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
      ## COMPLETION CALL
      response = openai.ChatCompletion.create(
        engine=model,
-        messages = messages
+        messages = messages,
        **optional_params
      )
    elif model in litellm.open_ai_chat_completion_models:
      openai.api_type = "openai"
      openai.api_base = "https://api.openai.com/v1"
      openai.api_version = None
      openai.api_key = os.environ.get("OPENAI_API_KEY")
      ## LOGGING
      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
      ## COMPLETION CALL
      response = openai.ChatCompletion.create(
        model=model,
        messages = messages,
        **optional_params
      )
    elif model in litellm.open_ai_text_completion_models:
      openai.api_type = "openai"
      openai.api_base = "https://api.openai.com/v1"
      openai.api_version = None
      openai.api_key = os.environ.get("OPENAI_API_KEY")
      prompt = " ".join([message["content"] for message in messages])
      ## LOGGING
      logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
      ## COMPLETION CALL
      response = openai.Completion.create(
          model=model,
          prompt = prompt
      )
    elif "replicate" in model:
      # replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
@ -64,8 +120,8 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
        replicate_api_token = os.environ.get("REPLICATE_API_KEY")
        os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
      prompt = " ".join([message["content"] for message in messages])
-      input = [{"prompt": prompt}]
+      input = {"prompt": prompt}
-      if max_tokens:
+      if max_tokens != float('inf'):
        input["max_length"] = max_tokens # for t5 models 
        input["max_new_tokens"] = max_tokens # for llama2 models 
      ## LOGGING
@ -90,7 +146,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
        ]
      }
      response = new_response
-    elif model in anthropic_models:
+    elif model in litellm.anthropic_models:
      #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
      prompt = f"{HUMAN_PROMPT}" 
      for message in messages:
@ -103,7 +159,8 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
          prompt += f"{HUMAN_PROMPT}{message['content']}"
      prompt += f"{AI_PROMPT}"
      anthropic = Anthropic()
-      if max_tokens:
+      # check if user passed in max_tokens != float('inf')
      if max_tokens != float('inf'):
        max_tokens_to_sample = max_tokens
      else:
        max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
@ -127,9 +184,9 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
          }
        ]
      }
-      print(f"new response: {new_response}")
+      print_verbose(f"new response: {new_response}")
      response = new_response
-    elif model in cohere_models:
+    elif model in litellm.cohere_models:
      cohere_key = os.environ.get("COHERE_API_KEY")
      co = cohere.Client(cohere_key)
      prompt = " ".join([message["content"] for message in messages])
@ -146,7 +203,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
                  "finish_reason": "stop",
                  "index": 0,
                  "message": {
-                      "content": response[0],
+                      "content": response[0].text,
                      "role": "assistant"
                  }
              }
@ -154,7 +211,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
      }
      response = new_response
-    elif model in open_ai_chat_completion_models:
+    elif model in litellm.open_ai_chat_completion_models:
      openai.api_type = "openai"
      openai.api_base = "https://api.openai.com/v1"
      openai.api_version = None
@ -166,7 +223,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
          model=model,
          messages = messages
      )
-    elif model in open_ai_text_completion_models:
+    elif model in litellm.open_ai_text_completion_models:
      openai.api_type = "openai"
      openai.api_base = "https://api.openai.com/v1"
      openai.api_version = None
@ -181,15 +238,21 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
      )
    else: 
      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
      args = locals()
      raise ValueError(f"No valid completion model args passed in - {args}")
    return response
  except Exception as e:
-    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
+    # log the original exception
-    raise e
+    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
    ## Map to OpenAI Exception
    raise exception_type(model=model, original_exception=e)
 ### EMBEDDING ENDPOINTS ####################
-@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/
+@client
-def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
+@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
 def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
  try:
    response = None
    if azure == True:
      # azure configs
@ -202,7 +265,7 @@ def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
      ## EMBEDDING CALL
      response = openai.Embedding.create(input=input, engine=model)
      print_verbose(f"response_value: {str(response)[:50]}")
-  elif model in open_ai_embedding_models:
+    elif model in litellm.open_ai_embedding_models:
      openai.api_type = "openai"
      openai.api_base = "https://api.openai.com/v1"
      openai.api_version = None
@ -214,216 +277,20 @@ def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
      print_verbose(f"response_value: {str(response)[:50]}")
    else: 
      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
      args = locals()
      raise ValueError(f"No valid embedding model args passed in - {args}")
    return response
 ### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc.
 class litellm_client:
  def __init__(self, success_callback=[], failure_callback=[], verbose=False):  # Constructor
      set_verbose = verbose
      self.success_callback = success_callback
      self.failure_callback = failure_callback
      self.logger_fn = None # if user passes in their own logging function
      self.callback_list = list(set(self.success_callback + self.failure_callback))
      self.set_callbacks()
  ## COMPLETION CALL 
  def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any:
    try:
      self.logger_fn = logger_fn
      response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input)
      my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread
      my_thread.start()
      return response
  except Exception as e:
-      args = locals() # get all the param values
+    # log the original exception
-      self.handle_failure(e, args)
+    logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e)
-      raise e
+    ## Map to OpenAI Exception
-
+    raise exception_type(model=model, original_exception=e)
  ## EMBEDDING CALL 
  def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any:
    try:
      self.logger_fn = logger_fn
      response = embedding(model, input, azure=azure, logger_fn=self.handle_input)
      my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread
      my_thread.start()
      return response
    except Exception as e:
      args = locals() # get all the param values 
      self.handle_failure(e, args)
      raise e
  def set_callbacks(self):  #instantiate any external packages
    for callback in self.callback_list: # only install what's required
      if callback == "sentry":
        try:
          import sentry_sdk
        except ImportError:
          print_verbose("Package 'sentry_sdk' is missing. Installing it...")
          subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
          import sentry_sdk
        self.sentry_sdk = sentry_sdk
        self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
        self.capture_exception = self.sentry_sdk.capture_exception
        self.add_breadcrumb = self.sentry_sdk.add_breadcrumb
      elif callback == "posthog":
        try:
          from posthog import Posthog
        except:
          print_verbose("Package 'posthog' is missing. Installing it...")
          subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
          from posthog import Posthog
        self.posthog = Posthog(
            project_api_key=os.environ.get("POSTHOG_API_KEY"),
            host=os.environ.get("POSTHOG_API_URL"))
      elif callback == "slack":
        try:
          from slack_bolt import App
        except ImportError:
          print_verbose("Package 'slack_bolt' is missing. Installing it...")
          subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
          from slack_bolt import App
        self.slack_app = App(
          token=os.environ.get("SLACK_API_TOKEN"),
          signing_secret=os.environ.get("SLACK_API_SECRET")
        )
        self.alerts_channel = os.environ["SLACK_API_CHANNEL"]
  def handle_input(self, model_call_details={}):
      if len(model_call_details.keys()) > 0:
        model = model_call_details["model"] if "model" in model_call_details else None
        if model:
          for callback in self.callback_list:
            if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration
              self.add_breadcrumb(
                category=f'{model}',
                message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)),
                level='info',
              )
          if self.logger_fn and callable(self.logger_fn):
            self.logger_fn(model_call_details)
      pass
  def handle_success(self, model, messages, additional_details):
    success_handler = additional_details.pop("success_handler", None)
    failure_handler = additional_details.pop("failure_handler", None)
    additional_details["litellm_model"] = str(model)
    additional_details["litellm_messages"] = str(messages)
    for callback in self.success_callback:
      try:
        if callback == "posthog":
          ph_obj = {}
          for detail in additional_details:
            ph_obj[detail] = additional_details[detail]
          event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query"
          if "user_id" in additional_details:
            self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
          else: 
            self.posthog.capture(event_name, ph_obj)
          pass
        elif callback == "slack":
          slack_msg = "" 
          if len(additional_details.keys()) > 0:
            for detail in additional_details: 
              slack_msg += f"{detail}: {additional_details[detail]}\n"
          slack_msg += f"Successful call"
          self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
      except:
        pass
    if success_handler and callable(success_handler):
      call_details = {
        "model": model,
        "messages": messages,
        "additional_details": additional_details
      }
      success_handler(call_details)
    pass
  def handle_failure(self, exception, args):
    args.pop("self")
    additional_details = args.pop("additional_details", {})
    success_handler = additional_details.pop("success_handler", None)
    failure_handler = additional_details.pop("failure_handler", None)
    for callback in self.failure_callback:
      try:
        if callback == "slack":
          slack_msg = "" 
          for param in args: 
            slack_msg += f"{param}: {args[param]}\n"
          if len(additional_details.keys()) > 0:
            for detail in additional_details: 
              slack_msg += f"{detail}: {additional_details[detail]}\n"
          slack_msg += f"Traceback: {traceback.format_exc()}"
          self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
        elif callback == "sentry":
          self.capture_exception(exception)
        elif callback == "posthog":
          if len(additional_details.keys()) > 0:
            ph_obj = {}
            for param in args: 
              ph_obj[param] += args[param]
            for detail in additional_details:
              ph_obj[detail] = additional_details[detail]
            event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query"
            if "user_id" in additional_details:
              self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
            else: 
              self.posthog.capture(event_name, ph_obj)
          else: 
            pass
      except:
        print(f"got an error calling {callback} - {traceback.format_exc()}")
    if failure_handler and callable(failure_handler):
      call_details = {
        "exception": exception,
        "additional_details": additional_details
      }
      failure_handler(call_details)
    pass
 ####### HELPER FUNCTIONS ################
-
+## Set verbose to true -> ```litellm.set_verbose = True```    
 #Logging function -> log the exact model details + what's being sent | Non-Blocking
 def logging(model, input, azure=False, additional_args={}, logger_fn=None):
  try:
    model_call_details = {}
    model_call_details["model"] = model
    model_call_details["input"] = input
    model_call_details["azure"] = azure
    model_call_details["additional_args"] = additional_args
    if logger_fn and callable(logger_fn):
      try:
        # log additional call details -> api key, etc. 
        if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models:
          model_call_details["api_type"] = openai.api_type
          model_call_details["api_base"] = openai.api_base
          model_call_details["api_version"] = openai.api_version
          model_call_details["api_key"] = openai.api_key
        elif "replicate" in model:
          model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
        elif model in anthropic_models:
          model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
        elif model in cohere_models:
          model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
        logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
      except:
        print_verbose(f"Basic model call details: {model_call_details}")
        print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
        pass
    else:
      print_verbose(f"Basic model call details: {model_call_details}")
      pass
  except:
    pass
 ## Set verbose to true -> ```litellm.verbose = True```    
 def print_verbose(print_statement):
-  if set_verbose:
+  if litellm.set_verbose:
    print(f"LiteLLM: {print_statement}")
    if random.random() <= 0.3:
      print("Get help - https://discord.com/invite/wuPM9dRgDw")
--- a/build/lib/litellm/timeout.py
+++ b/build/lib/litellm/timeout.py
@ -0,0 +1,80 @@
 """
 Module containing "timeout" decorator for sync and async callables.
 """
 import asyncio
 from concurrent import futures
 from inspect import iscoroutinefunction
 from functools import wraps
 from threading import Thread
 from openai.error import Timeout
 def timeout(
    timeout_duration: float = None, exception_to_raise = Timeout
 ):
    """
    Wraps a function to raise the specified exception if execution time
    is greater than the specified timeout.
    Works with both synchronous and asynchronous callables, but with synchronous ones will introduce
    some overhead due to the backend use of threads and asyncio.
        :param float timeout_duration: Timeout duration in seconds. If none callable won't time out.
        :param OpenAIError exception_to_raise: Exception to raise when the callable times out.
            Defaults to TimeoutError.
        :return: The decorated function.
        :rtype: callable
    """
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            async def async_func():
                return func(*args, **kwargs)
            thread = _LoopWrapper()
            thread.start()
            future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop)
            try:
                local_timeout_duration = timeout_duration
                if "force_timeout" in kwargs:
                    local_timeout_duration = kwargs["force_timeout"]
                result = future.result(timeout=local_timeout_duration)
            except futures.TimeoutError:
                thread.stop_loop()
                raise exception_to_raise()
            thread.stop_loop()
            return result
        @wraps(func)
        async def async_wrapper(*args, **kwargs):
            try:
                value = await asyncio.wait_for(
                    func(*args, **kwargs), timeout=timeout_duration
                )
                return value
            except asyncio.TimeoutError:
                raise exception_to_raise()
        if iscoroutinefunction(func):
            return async_wrapper
        return wrapper
    return decorator
 class _LoopWrapper(Thread):
    def __init__(self):
        super().__init__(daemon=True)
        self.loop = asyncio.new_event_loop()
    def run(self) -> None:
        self.loop.run_forever()
        self.loop.call_soon_threadsafe(self.loop.close)
    def stop_loop(self):
        for task in asyncio.all_tasks(self.loop):
            task.cancel()
        self.loop.call_soon_threadsafe(self.loop.stop)
--- a/build/lib/litellm/utils.py
+++ b/build/lib/litellm/utils.py
@ -0,0 +1,316 @@
 import dotenv, json, traceback, threading
 import subprocess, os 
 import litellm, openai 
 import random, uuid, requests
 from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
 sentry_sdk_instance = None
 capture_exception = None
 add_breadcrumb = None
 posthog = None
 slack_app = None
 alerts_channel = None
 callback_list = []
 user_logger_fn = None
 additional_details = {}
 def print_verbose(print_statement):
  if litellm.set_verbose:
    print(f"LiteLLM: {print_statement}")
    if random.random() <= 0.3:
      print("Get help - https://discord.com/invite/wuPM9dRgDw")
 ####### LOGGING ###################
 #Logging function -> log the exact model details + what's being sent | Non-Blocking
 def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
  try:
    model_call_details = {}
    model_call_details["model"] = model
    model_call_details["azure"] = azure
    # log exception details
    if exception:
      model_call_details["original_exception"] = exception
    if litellm.telemetry:
      safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
    model_call_details["input"] = input
    # log additional call details -> api key, etc. 
    if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
      model_call_details["api_type"] = openai.api_type
      model_call_details["api_base"] = openai.api_base
      model_call_details["api_version"] = openai.api_version
      model_call_details["api_key"] = openai.api_key
    elif "replicate" in model:
      model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
    elif model in litellm.anthropic_models:
      model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
    elif model in litellm.cohere_models:
      model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
    model_call_details["additional_args"] = additional_args
    ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
    print_verbose(f"Basic model call details: {model_call_details}")
    if logger_fn and callable(logger_fn):
      try:
        logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
      except:
        print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
  except:
    traceback.print_exc()
    pass
 ####### CLIENT ################### 
 # make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
 def client(original_function):
    def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
      try: 
        global callback_list, add_breadcrumb
        if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: 
          callback_list = list(set(litellm.success_callback + litellm.failure_callback))
          set_callbacks(callback_list=callback_list)
        if add_breadcrumb:
          add_breadcrumb(
                category="litellm.llm_call",
                message=f"Positional Args: {args}, Keyword Args: {kwargs}",
                level="info",
            )
      except: # DO NOT BLOCK running the function because of this
        print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
      pass
    def wrapper(*args, **kwargs):
        try:
          function_setup(args, kwargs)
          ## MODEL CALL
          result = original_function(*args, **kwargs)
          ## LOG SUCCESS 
          my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread
          my_thread.start()
          return result
        except Exception as e:
          traceback_exception = traceback.format_exc()
          my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread
          my_thread.start()
          raise e
    return wrapper
 ####### HELPER FUNCTIONS ################
 def set_callbacks(callback_list):
  global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
  try:
    for callback in callback_list:
      if callback == "sentry":
        try:
            import sentry_sdk
        except ImportError:
            print_verbose("Package 'sentry_sdk' is missing. Installing it...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
            import sentry_sdk
        sentry_sdk_instance = sentry_sdk
        sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
        capture_exception = sentry_sdk_instance.capture_exception
        add_breadcrumb = sentry_sdk_instance.add_breadcrumb 
      elif callback == "posthog":
        try:
            from posthog import Posthog
        except ImportError:
            print_verbose("Package 'posthog' is missing. Installing it...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
            from posthog import Posthog
        posthog = Posthog(
          project_api_key=os.environ.get("POSTHOG_API_KEY"),
          host=os.environ.get("POSTHOG_API_URL"))
      elif callback == "slack":
        try:
            from slack_bolt import App
        except ImportError:
            print_verbose("Package 'slack_bolt' is missing. Installing it...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
            from slack_bolt import App
        slack_app = App(
          token=os.environ.get("SLACK_API_TOKEN"),
          signing_secret=os.environ.get("SLACK_API_SECRET")
        )
        alerts_channel = os.environ["SLACK_API_CHANNEL"]
        print_verbose(f"Initialized Slack App: {slack_app}")
  except:
    pass
 def handle_failure(exception, traceback_exception, args, kwargs):
    global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
    try:
      print_verbose(f"handle_failure args: {args}")
      print_verbose(f"handle_failure kwargs: {kwargs}")
      success_handler = additional_details.pop("success_handler", None)
      failure_handler = additional_details.pop("failure_handler", None)
      additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
      print_verbose(f"self.failure_callback: {litellm.failure_callback}")
      print_verbose(f"additional_details: {additional_details}")
      for callback in litellm.failure_callback:
        try:
          if callback == "slack":
            slack_msg = "" 
            if len(kwargs) > 0: 
              for key in kwargs: 
                slack_msg += f"{key}: {kwargs[key]}\n"
            if len(args) > 0:
              for i, arg in enumerate(args):
                slack_msg += f"LiteLLM_Args_{str(i)}: {arg}"
            for detail in additional_details: 
              slack_msg += f"{detail}: {additional_details[detail]}\n"
            slack_msg += f"Traceback: {traceback_exception}"
            slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
          elif callback == "sentry":
            capture_exception(exception)
          elif callback == "posthog": 
            print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}")
            ph_obj = {}
            if len(kwargs) > 0: 
              ph_obj = kwargs
            if len(args) > 0:
              for i, arg in enumerate(args):
                ph_obj["litellm_args_" + str(i)] = arg
            for detail in additional_details:
              ph_obj[detail] = additional_details[detail]
            event_name = additional_details["Event_Name"]
            print_verbose(f"ph_obj: {ph_obj}")
            print_verbose(f"PostHog Event Name: {event_name}")
            if "user_id" in additional_details:
              posthog.capture(additional_details["user_id"], event_name, ph_obj)
            else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
              unique_id = str(uuid.uuid4())
              posthog.capture(unique_id, event_name)
              print_verbose(f"successfully logged to PostHog!")
        except:
          print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
          pass
      if failure_handler and callable(failure_handler):
        call_details = {
          "exception": exception,
          "additional_details": additional_details
        }
        failure_handler(call_details)
      pass
    except:
      pass
 def handle_success(*args, **kwargs):
  try:
    success_handler = additional_details.pop("success_handler", None)
    failure_handler = additional_details.pop("failure_handler", None)
    additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query")
    for callback in litellm.success_callback:
      try:
        if callback == "posthog":
          ph_obj = {}
          for detail in additional_details:
            ph_obj[detail] = additional_details[detail]
          event_name = additional_details["Event_Name"]
          if "user_id" in additional_details:
            posthog.capture(additional_details["user_id"], event_name, ph_obj)
          else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
            unique_id = str(uuid.uuid4())
            posthog.capture(unique_id, event_name, ph_obj)
          pass
        elif callback == "slack":
          slack_msg = "" 
          for detail in additional_details: 
            slack_msg += f"{detail}: {additional_details[detail]}\n"
          slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
      except:
        pass
    if success_handler and callable(success_handler):
      success_handler(args, kwargs)
    pass
  except:
    pass
 def exception_type(model, original_exception):
    try:
      if isinstance(original_exception, OpenAIError):
          # Handle the OpenAIError
          raise original_exception
      elif model:
        error_str = str(original_exception)
        if isinstance(original_exception, BaseException):
          exception_type = type(original_exception).__name__
        else:
          exception_type = ""
        if "claude" in model: #one of the anthropics
          if "status_code" in original_exception:
            print_verbose(f"status_code: {original_exception.status_code}")
            if original_exception.status_code == 401:
              raise AuthenticationError(f"AnthropicException - {original_exception.message}")
            elif original_exception.status_code == 400:
              raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
            elif original_exception.status_code == 429:
              raise RateLimitError(f"AnthropicException - {original_exception.message}")
        elif "replicate" in model:
          if "Incorrect authentication token" in error_str:
            raise AuthenticationError(f"ReplicateException - {error_str}")
          elif exception_type == "ModelError":
            raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
          elif "Request was throttled" in error_str:
            raise RateLimitError(f"ReplicateException - {error_str}")
          elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
            raise ServiceUnavailableError(f"ReplicateException - {error_str}")
        elif model == "command-nightly": #Cohere
          if "invalid api token" in error_str or "No API key provided." in error_str:
            raise AuthenticationError(f"CohereException - {error_str}")
          elif "too many tokens" in error_str:
            raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
          elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
            raise RateLimitError(f"CohereException - {original_exception.message}")
        raise original_exception # base case - return the original exception
      else:
        raise original_exception
    except:
      raise original_exception
 def safe_crash_reporting(model=None, exception=None, azure=None):
    data = {
      "model": model,
      "exception": str(exception),
      "azure": azure
    }
    threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
 def litellm_telemetry(data):
    # Load or generate the UUID
    uuid_file = 'litellm_uuid.txt'
    try:
        # Try to open the file and load the UUID
        with open(uuid_file, 'r') as file:
            uuid_value = file.read()
            if uuid_value:
                uuid_value = uuid_value.strip()
            else:
                raise FileNotFoundError
    except FileNotFoundError:
        # Generate a new UUID if the file doesn't exist or is empty
        new_uuid = uuid.uuid4()
        uuid_value = str(new_uuid)
        with open(uuid_file, 'w') as file:
            file.write(uuid_value)
    # Prepare the data to send to localhost:3000
    payload = {
        'uuid': uuid_value,
        'data': data
    }
    print_verbose(f"payload: {payload}")
    try:
      # Make the POST request to localhost:3000
      response = requests.post('https://litellm.berri.ai/logging', json=payload)
      response.raise_for_status()  # Raise an exception for HTTP errors
    except requests.exceptions.RequestException as e:
        # Handle any errors in the request
        pass
--- a/dist/litellm-0.1.2-py3-none-any.whl
+++ b/dist/litellm-0.1.2-py3-none-any.whl
--- a/dist/litellm-0.1.2.tar.gz
+++ b/dist/litellm-0.1.2.tar.gz
--- a/dist/litellm-0.1.216-py3-none-any.whl
+++ b/dist/litellm-0.1.216-py3-none-any.whl
--- a/dist/litellm-0.1.216.tar.gz
+++ b/dist/litellm-0.1.216.tar.gz
--- a/litellm.egg-info/PKG-INFO
+++ b/litellm.egg-info/PKG-INFO
@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: litellm
-Version: 0.1.207
+Version: 0.1.216
 Summary: Library to easily interface with LLM API providers
 Author: BerriAI
 License-File: LICENSE
--- a/litellm.egg-info/SOURCES.txt
+++ b/litellm.egg-info/SOURCES.txt
@ -1,5 +1,6 @@
 LICENSE
 README.md
 pyproject.toml
 setup.py
 litellm/__init__.py
 litellm/main.py
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/tests/test_client.py
+++ b/litellm/tests/test_client.py
@ -57,3 +57,4 @@ def test_good_azure_embedding():
        print(f"response: {str(response)[:50]}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
--- a/litellm/tests/test_no_client.py
+++ b/litellm/tests/test_no_client.py
@ -0,0 +1,23 @@
 #### What this tests ####
 #    This tests error logging (with custom user functions) for the `completion` + `embedding` endpoints without callbacks (i.e. slack, posthog, etc. not set)
 #    Requirements: Remove any env keys you have related to slack/posthog/etc. + anthropic api key (cause an exception)
 import sys, os
 import traceback
 sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion
 litellm.set_verbose = True
 model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
 user_message = "Hello, how are you?"
 messages = [{ "content": user_message,"role": "user"}]
 for model in model_fallback_list:
    try:
        response = embedding(model="text-embedding-ada-002", input=[user_message])
        response = completion(model=model, messages=messages)
    except Exception as e:
        print(f"error occurred: {traceback.format_exc()}") 
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -234,6 +234,7 @@ def handle_success(*args, **kwargs):
 def exception_type(model, original_exception):
    try:
      if isinstance(original_exception, OpenAIError):
          # Handle the OpenAIError
          raise original_exception
@ -244,6 +245,7 @@ def exception_type(model, original_exception):
        else:
          exception_type = ""
        if "claude" in model: #one of the anthropics
          if "status_code" in original_exception:
            print_verbose(f"status_code: {original_exception.status_code}")
            if original_exception.status_code == 401:
              raise AuthenticationError(f"AnthropicException - {original_exception.message}")
@ -270,6 +272,8 @@ def exception_type(model, original_exception):
        raise original_exception # base case - return the original exception
      else:
        raise original_exception
    except:
      raise original_exception
 def safe_crash_reporting(model=None, exception=None, azure=None):
    data = {
@ -277,11 +281,9 @@ def safe_crash_reporting(model=None, exception=None, azure=None):
      "exception": str(exception),
      "azure": azure
    }
    print(f"data in crash reporting: {data}")
    threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
 def litellm_telemetry(data):
    print(f"data in in litellm telemetry: {data}")
    # Load or generate the UUID
    uuid_file = 'litellm_uuid.txt'
    try:
@ -290,7 +292,6 @@ def litellm_telemetry(data):
            uuid_value = file.read()
            if uuid_value:
                uuid_value = uuid_value.strip()
                print(f"Loaded UUID: {uuid_value}")
            else:
                raise FileNotFoundError
    except FileNotFoundError:
@ -299,7 +300,6 @@ def litellm_telemetry(data):
        uuid_value = str(new_uuid)
        with open(uuid_file, 'w') as file:
            file.write(uuid_value)
        print(f"Generated and stored UUID: {uuid_value}")
    # Prepare the data to send to localhost:3000
    payload = {
@ -311,7 +311,6 @@ def litellm_telemetry(data):
      # Make the POST request to localhost:3000
      response = requests.post('https://litellm.berri.ai/logging', json=payload)
      response.raise_for_status()  # Raise an exception for HTTP errors
      print('Request successfully sent!')
    except requests.exceptions.RequestException as e:
        # Handle any errors in the request
-        print(f'Error: {e}')
+        pass
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
    name='litellm',
-    version='0.1.214',
+    version='0.1.216',
    description='Library to easily interface with LLM API providers',
    author='BerriAI',
    packages=[