diff --git a/build/lib/litellm/__init__.py b/build/lib/litellm/__init__.py
deleted file mode 100644
index 191a8ed26..000000000
--- a/build/lib/litellm/__init__.py
+++ /dev/null
@@ -1,47 +0,0 @@
-success_callback = []
-failure_callback = []
-set_verbose=False
-telemetry=True
-max_tokens = 256 # OpenAI Defaults
-retry = True # control tenacity retries. 
-####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
-api_base = None
-headers = None
-####### COMPLETION MODELS ###################
-open_ai_chat_completion_models = [
-  "gpt-4",
-  "gpt-4-0613",
-  "gpt-4-32k",
-  "gpt-4-32k-0613",
-  #################
-  "gpt-3.5-turbo",
-  "gpt-3.5-turbo-16k",
-  "gpt-3.5-turbo-0613",
-  "gpt-3.5-turbo-16k-0613",
-  'gpt-3.5-turbo', 
-  'gpt-3.5-turbo-16k-0613',
-  'gpt-3.5-turbo-16k'
-]
-open_ai_text_completion_models = [
-    'text-davinci-003'
-]
-
-cohere_models = [
-    'command-nightly',
-]
-
-anthropic_models = [
-  "claude-2", 
-  "claude-instant-1"
-]
-
-model_list = open_ai_chat_completion_models + open_ai_text_completion_models + cohere_models + anthropic_models
-
-####### EMBEDDING MODELS ###################
-open_ai_embedding_models = [
-    'text-embedding-ada-002'
-]
-from .timeout import timeout
-from .utils import client, logging, exception_type  # Import all the symbols from main.py
-from .main import *  # Import all the symbols from main.py
-from .integrations import *
\ No newline at end of file
diff --git a/build/lib/litellm/integrations/__init__.py b/build/lib/litellm/integrations/__init__.py
deleted file mode 100644
index b9742821a..000000000
--- a/build/lib/litellm/integrations/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from . import *
\ No newline at end of file
diff --git a/build/lib/litellm/integrations/helicone.py b/build/lib/litellm/integrations/helicone.py
deleted file mode 100644
index 6b3d61965..000000000
--- a/build/lib/litellm/integrations/helicone.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#### What this does ####
-#    On success, logs events to Helicone
-import dotenv, os
-import requests
-from anthropic import HUMAN_PROMPT, AI_PROMPT
-dotenv.load_dotenv() # Loading env variables using dotenv
-import traceback
-class HeliconeLogger:
-    # Class variables or attributes
-    helicone_model_list = ["gpt", "claude"]
-    def __init__(self):
-        # Instance variables
-        self.provider_url = "https://api.openai.com/v1"
-        self.key = os.getenv('HELICONE_API_KEY')
-
-    def claude_mapping(self, model, messages, response_obj):
-        prompt = f"{HUMAN_PROMPT}" 
-        for message in messages:
-            if "role" in message:
-                if message["role"] == "user":
-                    prompt += f"{HUMAN_PROMPT}{message['content']}"
-                else:
-                    prompt += f"{AI_PROMPT}{message['content']}"
-            else:
-                prompt += f"{HUMAN_PROMPT}{message['content']}"
-        prompt += f"{AI_PROMPT}"
-        claude_provider_request = {"model": model, "prompt": prompt}
-
-        claude_response_obj = {"completion": response_obj['choices'][0]['message']['content'], "model": model, "stop_reason": "stop_sequence"}
-
-        return claude_provider_request, claude_response_obj
-        
-    def log_success(self, model, messages, response_obj, start_time, end_time, print_verbose):
-        # Method definition
-        try:
-            print_verbose(f"Helicone Logging - Enters logging function for model {model}")
-            model = model if any(accepted_model in model for accepted_model in self.helicone_model_list) else "gpt-3.5-turbo"
-            provider_request = {"model": model, "messages": messages}
-
-            if "claude" in model: 
-                provider_request, response_obj = self.claude_mapping(model=model, messages=messages, response_obj=response_obj)
-
-            providerResponse = {
-                "json": response_obj, 
-                "headers": {"openai-version": "2020-10-01"}, 
-                "status": 200
-            }
-
-            # Code to be executed
-            url = "https://api.hconeai.com/oai/v1/log"
-            headers = {
-                'Authorization': f'Bearer {self.key}',
-                'Content-Type': 'application/json'
-            }
-            start_time_seconds = int(start_time.timestamp())
-            start_time_milliseconds = int((start_time.timestamp() - start_time_seconds) * 1000)
-            end_time_seconds = int(end_time.timestamp())
-            end_time_milliseconds = int((end_time.timestamp() - end_time_seconds) * 1000)
-            data = {
-                "providerRequest": {"url": self.provider_url, "json": provider_request, "meta": {"Helicone-Auth": f"Bearer {self.key}"}},
-                "providerResponse": providerResponse,
-                "timing": {"startTime": {"seconds": start_time_seconds, "milliseconds": start_time_milliseconds}, "endTime": {"seconds": end_time_seconds, "milliseconds": end_time_milliseconds}} # {"seconds": .., "milliseconds": ..}
-            }
-            response = requests.post(url, headers=headers, json=data)
-            if response.status_code == 200:
-                print_verbose("Helicone Logging - Success!")
-            else:
-                print_verbose(f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}")
-                print_verbose(f"Helicone Logging - Error {response.text}")
-        except:
-            # traceback.print_exc()
-            print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
-            pass
\ No newline at end of file
diff --git a/build/lib/litellm/main.py b/build/lib/litellm/main.py
deleted file mode 100644
index f35af8013..000000000
--- a/build/lib/litellm/main.py
+++ /dev/null
@@ -1,315 +0,0 @@
-import os, openai, cohere, replicate, sys
-from typing import Any
-from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
-import traceback
-from functools import partial
-import dotenv
-import traceback
-import litellm
-from litellm import client, logging, exception_type, timeout, success_callback, failure_callback
-import random
-import asyncio
-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_random_exponential,
-)  # for exponential backoff
-####### ENVIRONMENT VARIABLES ###################
-dotenv.load_dotenv() # Loading env variables using dotenv
-
-def get_optional_params(
-    # 12 optional params
-    functions = [],
-    function_call = "",
-    temperature = 1,
-    top_p = 1,
-    n = 1,
-    stream = False,
-    stop = None,
-    max_tokens = float('inf'),
-    presence_penalty = 0,
-    frequency_penalty = 0,
-    logit_bias = {},
-    user = "",
-    deployment_id = None
-):
-  optional_params = {}
-  if functions != []:
-      optional_params["functions"] = functions
-  if function_call != "":
-      optional_params["function_call"] = function_call
-  if temperature != 1:
-      optional_params["temperature"] = temperature
-  if top_p != 1:
-      optional_params["top_p"] = top_p
-  if n != 1:
-      optional_params["n"] = n
-  if stream:
-      optional_params["stream"] = stream
-  if stop != None:
-      optional_params["stop"] = stop
-  if max_tokens != float('inf'):
-      optional_params["max_tokens"] = max_tokens
-  if presence_penalty != 0:
-      optional_params["presence_penalty"] = presence_penalty
-  if frequency_penalty != 0:
-      optional_params["frequency_penalty"] = frequency_penalty
-  if logit_bias != {}:
-      optional_params["logit_bias"] = logit_bias
-  if user != "":
-      optional_params["user"] = user
-  if deployment_id != None:
-      optional_params["deployment_id"] = user
-  return optional_params
-
-####### COMPLETION ENDPOINTS ################
-#############################################
-async def acompletion(*args, **kwargs):
-  loop = asyncio.get_event_loop()
-  
-  # Use a partial function to pass your keyword arguments
-  func = partial(completion, *args, **kwargs)
-
-  # Call the synchronous function using run_in_executor
-  return await loop.run_in_executor(None, func)
-
-@client
-@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
-@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
-def completion(
-    model, messages, # required params
-    # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
-    functions=[], function_call="", # optional params
-    temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
-    presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
-    # Optional liteLLM function params
-    *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False
-  ):
-  try:
-    # check if user passed in any of the OpenAI optional params
-    optional_params = get_optional_params(
-      functions=functions, function_call=function_call, 
-      temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
-      presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id
-    )
-    if azure == True:
-      # azure configs
-      openai.api_type = "azure"
-      openai.api_base = litellm.api_base if litellm.api_base is not None else os.environ.get("AZURE_API_BASE")
-      openai.api_version = os.environ.get("AZURE_API_VERSION")
-      openai.api_key = api_key if api_key is not None else os.environ.get("AZURE_API_KEY")
-      ## LOGGING
-      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
-      ## COMPLETION CALL
-      if litellm.headers:
-         response = openai.ChatCompletion.create(
-            engine=model,
-            messages = messages,
-            headers = litellm.headers,
-            **optional_params,
-          )
-      else:
-        response = openai.ChatCompletion.create(
-          engine=model,
-          messages = messages,
-          **optional_params
-        )
-    elif model in litellm.open_ai_chat_completion_models:
-      openai.api_type = "openai"
-      openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1"
-      openai.api_version = None
-      openai.api_key = api_key if api_key is not None else os.environ.get("OPENAI_API_KEY")
-      ## LOGGING
-      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
-      ## COMPLETION CALL
-      if litellm.headers:
-         response = openai.ChatCompletion.create(
-          model=model,
-          messages = messages,
-          headers = litellm.headers,
-          **optional_params
-        )
-      else:
-        response = openai.ChatCompletion.create(
-          model=model,
-          messages = messages,
-          **optional_params
-        )
-    elif model in litellm.open_ai_text_completion_models:
-      openai.api_type = "openai"
-      openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1"
-      openai.api_version = None
-      openai.api_key = api_key if api_key is not None else os.environ.get("OPENAI_API_KEY")
-      prompt = " ".join([message["content"] for message in messages])
-      ## LOGGING
-      logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
-      ## COMPLETION CALL
-      if litellm.headers:
-        response = openai.Completion.create(
-          model=model,
-          prompt = prompt,
-          headers = litellm.headers,
-        )
-      else:
-        response = openai.Completion.create(
-            model=model,
-            prompt = prompt
-        )
-    elif "replicate" in model:
-      # replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
-      # checking in case user set it to REPLICATE_API_KEY instead 
-      if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
-        replicate_api_token = os.environ.get("REPLICATE_API_KEY")
-        os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
-      elif api_key:
-         os.environ["REPLICATE_API_TOKEN"] = api_key
-      prompt = " ".join([message["content"] for message in messages])
-      input = {"prompt": prompt}
-      if max_tokens != float('inf'):
-        input["max_length"] = max_tokens # for t5 models 
-        input["max_new_tokens"] = max_tokens # for llama2 models 
-      ## LOGGING
-      logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
-      ## COMPLETION CALL
-      output = replicate.run(
-        model,
-        input=input)
-      response = ""
-      for item in output: 
-        response += item
-      new_response = {
-        "choices": [
-          {
-            "finish_reason": "stop",
-            "index": 0,
-            "message": {
-                "content": response,
-                "role": "assistant"
-            }
-          }
-        ]
-      }
-      response = new_response
-    elif model in litellm.anthropic_models:
-      #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
-      if api_key:
-         os.environ["ANTHROPIC_API_KEY"] = api_key
-      prompt = f"{HUMAN_PROMPT}" 
-      for message in messages:
-        if "role" in message:
-          if message["role"] == "user":
-            prompt += f"{HUMAN_PROMPT}{message['content']}"
-          else:
-            prompt += f"{AI_PROMPT}{message['content']}"
-        else:
-          prompt += f"{HUMAN_PROMPT}{message['content']}"
-      prompt += f"{AI_PROMPT}"
-      anthropic = Anthropic()
-      # check if user passed in max_tokens != float('inf')
-      if max_tokens != float('inf'):
-        max_tokens_to_sample = max_tokens
-      else:
-        max_tokens_to_sample = litellm.max_tokens # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
-      ## LOGGING
-      logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
-      ## COMPLETION CALL
-      completion = anthropic.completions.create(
-            model=model,
-            prompt=prompt,
-            max_tokens_to_sample=max_tokens_to_sample
-        )
-      new_response = {
-        "choices": [
-          {
-            "finish_reason": "stop",
-            "index": 0,
-            "message": {
-                "content": completion.completion,
-                "role": "assistant"
-            }
-          }
-        ]
-      }
-      print_verbose(f"new response: {new_response}")
-      response = new_response
-    elif model in litellm.cohere_models:
-      cohere_key = api_key if api_key is not None else os.environ.get("COHERE_API_KEY")
-      co = cohere.Client(cohere_key)
-      prompt = " ".join([message["content"] for message in messages])
-      ## LOGGING
-      logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
-      ## COMPLETION CALL
-      response = co.generate(  
-        model=model,
-        prompt = prompt
-      )
-      new_response = {
-          "choices": [
-              {
-                  "finish_reason": "stop",
-                  "index": 0,
-                  "message": {
-                      "content": response[0].text,
-                      "role": "assistant"
-                  }
-              }
-          ],
-      }
-      response = new_response
-    else: 
-      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
-      args = locals()
-      raise ValueError(f"No valid completion model args passed in - {args}")
-    return response
-  except Exception as e:
-    # log the original exception
-    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
-    ## Map to OpenAI Exception
-    raise exception_type(model=model, original_exception=e)
-
-### EMBEDDING ENDPOINTS ####################
-@client
-@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
-def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
-  try:
-    response = None
-    if azure == True:
-      # azure configs
-      openai.api_type = "azure"
-      openai.api_base = os.environ.get("AZURE_API_BASE")
-      openai.api_version = os.environ.get("AZURE_API_VERSION")
-      openai.api_key = os.environ.get("AZURE_API_KEY")
-      ## LOGGING
-      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
-      ## EMBEDDING CALL
-      response = openai.Embedding.create(input=input, engine=model)
-      print_verbose(f"response_value: {str(response)[:50]}")
-    elif model in litellm.open_ai_embedding_models:
-      openai.api_type = "openai"
-      openai.api_base = "https://api.openai.com/v1"
-      openai.api_version = None
-      openai.api_key = os.environ.get("OPENAI_API_KEY")
-      ## LOGGING
-      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
-      ## EMBEDDING CALL
-      response = openai.Embedding.create(input=input, model=model)
-      print_verbose(f"response_value: {str(response)[:50]}")
-    else: 
-      logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
-      args = locals()
-      raise ValueError(f"No valid embedding model args passed in - {args}")
-    
-    return response
-  except Exception as e:
-    # log the original exception
-    logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e)
-    ## Map to OpenAI Exception
-    raise exception_type(model=model, original_exception=e)
-####### HELPER FUNCTIONS ################
-## Set verbose to true -> ```litellm.set_verbose = True```    
-def print_verbose(print_statement):
-  if litellm.set_verbose:
-    print(f"LiteLLM: {print_statement}")
-    if random.random() <= 0.3:
-      print("Get help - https://discord.com/invite/wuPM9dRgDw")
-
diff --git a/build/lib/litellm/timeout.py b/build/lib/litellm/timeout.py
deleted file mode 100644
index 37bbbffc1..000000000
--- a/build/lib/litellm/timeout.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-Module containing "timeout" decorator for sync and async callables.
-"""
-
-import asyncio
-
-from concurrent import futures
-from inspect import iscoroutinefunction
-from functools import wraps
-from threading import Thread
-from openai.error import Timeout
-
-
-def timeout(
-    timeout_duration: float = None, exception_to_raise = Timeout
-):
-    """
-    Wraps a function to raise the specified exception if execution time
-    is greater than the specified timeout.
-
-    Works with both synchronous and asynchronous callables, but with synchronous ones will introduce
-    some overhead due to the backend use of threads and asyncio.
-
-        :param float timeout_duration: Timeout duration in seconds. If none callable won't time out.
-        :param OpenAIError exception_to_raise: Exception to raise when the callable times out.
-            Defaults to TimeoutError.
-        :return: The decorated function.
-        :rtype: callable
-    """
-
-    def decorator(func):
-        @wraps(func)
-        def wrapper(*args, **kwargs):
-            async def async_func():
-                return func(*args, **kwargs)
-
-            thread = _LoopWrapper()
-            thread.start()
-            future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop)
-            local_timeout_duration = timeout_duration
-            if "force_timeout" in kwargs:
-                local_timeout_duration = kwargs["force_timeout"]
-            try:
-                result = future.result(timeout=local_timeout_duration)
-            except futures.TimeoutError:
-                thread.stop_loop()
-                raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
-            thread.stop_loop()
-            return result
-
-        @wraps(func)
-        async def async_wrapper(*args, **kwargs):
-            local_timeout_duration = timeout_duration
-            if "force_timeout" in kwargs:
-                local_timeout_duration = kwargs["force_timeout"]
-            try:
-                value = await asyncio.wait_for(
-                    func(*args, **kwargs), timeout=timeout_duration
-                )
-                return value
-            except asyncio.TimeoutError:
-                raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
-
-        if iscoroutinefunction(func):
-            return async_wrapper
-        return wrapper
-
-    return decorator
-
-
-class _LoopWrapper(Thread):
-    def __init__(self):
-        super().__init__(daemon=True)
-        self.loop = asyncio.new_event_loop()
-
-    def run(self) -> None:
-        self.loop.run_forever()
-        self.loop.call_soon_threadsafe(self.loop.close)
-
-    def stop_loop(self):
-        for task in asyncio.all_tasks(self.loop):
-            task.cancel()
-        self.loop.call_soon_threadsafe(self.loop.stop)
\ No newline at end of file
diff --git a/build/lib/litellm/utils.py b/build/lib/litellm/utils.py
deleted file mode 100644
index b0050226b..000000000
--- a/build/lib/litellm/utils.py
+++ /dev/null
@@ -1,333 +0,0 @@
-import dotenv, json, traceback, threading
-import subprocess, os 
-import litellm, openai 
-import random, uuid, requests
-import datetime
-from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
-####### ENVIRONMENT VARIABLES ###################
-dotenv.load_dotenv() # Loading env variables using dotenv
-sentry_sdk_instance = None
-capture_exception = None
-add_breadcrumb = None
-posthog = None
-slack_app = None
-alerts_channel = None
-heliconeLogger = None
-callback_list = []
-user_logger_fn = None
-additional_details = {}
-
-def print_verbose(print_statement):
-  if litellm.set_verbose:
-    print(f"LiteLLM: {print_statement}")
-    if random.random() <= 0.3:
-      print("Get help - https://discord.com/invite/wuPM9dRgDw")
-
-####### LOGGING ###################
-#Logging function -> log the exact model details + what's being sent | Non-Blocking
-def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
-  try:
-    model_call_details = {}
-    model_call_details["model"] = model
-    model_call_details["azure"] = azure
-    # log exception details
-    if exception:
-      model_call_details["original_exception"] = exception
-
-    if litellm.telemetry:
-      safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
-
-    model_call_details["input"] = input
-    # log additional call details -> api key, etc. 
-    if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
-      model_call_details["api_type"] = openai.api_type
-      model_call_details["api_base"] = openai.api_base
-      model_call_details["api_version"] = openai.api_version
-      model_call_details["api_key"] = openai.api_key
-    elif "replicate" in model:
-      model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
-    elif model in litellm.anthropic_models:
-      model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
-    elif model in litellm.cohere_models:
-      model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
-    model_call_details["additional_args"] = additional_args
-    ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
-    print_verbose(f"Basic model call details: {model_call_details}")
-    if logger_fn and callable(logger_fn):
-      try:
-        logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
-      except:
-        print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
-  except:
-    traceback.print_exc()
-    pass
-
-####### CLIENT ################### 
-# make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
-def client(original_function):
-    def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
-      try: 
-        global callback_list, add_breadcrumb
-        if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: 
-          callback_list = list(set(litellm.success_callback + litellm.failure_callback))
-          set_callbacks(callback_list=callback_list,)
-        if add_breadcrumb:
-          add_breadcrumb(
-                category="litellm.llm_call",
-                message=f"Positional Args: {args}, Keyword Args: {kwargs}",
-                level="info",
-            )
-      except: # DO NOT BLOCK running the function because of this
-        print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
-      pass
-
-    def wrapper(*args, **kwargs):
-        try:
-          function_setup(args, kwargs)
-          ## MODEL CALL
-          start_time = datetime.datetime.now()
-          result = original_function(*args, **kwargs)
-          end_time = datetime.datetime.now()
-          ## LOG SUCCESS 
-          my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread
-          my_thread.start()
-          return result
-        except Exception as e:
-          traceback_exception = traceback.format_exc()
-          my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread
-          my_thread.start()
-          raise e
-    return wrapper
-
-####### HELPER FUNCTIONS ################
-def set_callbacks(callback_list):
-  global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger
-  try:
-    for callback in callback_list:
-      if callback == "sentry":
-        try:
-            import sentry_sdk
-        except ImportError:
-            print_verbose("Package 'sentry_sdk' is missing. Installing it...")
-            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
-            import sentry_sdk
-        sentry_sdk_instance = sentry_sdk
-        sentry_trace_rate = os.environ.get("SENTRY_API_TRACE_RATE") if "SENTRY_API_TRACE_RATE" in os.environ else "1.0"
-        sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
-        capture_exception = sentry_sdk_instance.capture_exception
-        add_breadcrumb = sentry_sdk_instance.add_breadcrumb 
-      elif callback == "posthog":
-        try:
-            from posthog import Posthog
-        except ImportError:
-            print_verbose("Package 'posthog' is missing. Installing it...")
-            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
-            from posthog import Posthog
-        posthog = Posthog(
-          project_api_key=os.environ.get("POSTHOG_API_KEY"),
-          host=os.environ.get("POSTHOG_API_URL"))
-      elif callback == "slack":
-        try:
-            from slack_bolt import App
-        except ImportError:
-            print_verbose("Package 'slack_bolt' is missing. Installing it...")
-            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
-            from slack_bolt import App
-        slack_app = App(
-          token=os.environ.get("SLACK_API_TOKEN"),
-          signing_secret=os.environ.get("SLACK_API_SECRET")
-        )
-        alerts_channel = os.environ["SLACK_API_CHANNEL"]
-        print_verbose(f"Initialized Slack App: {slack_app}")
-      elif callback == "helicone":
-        from .integrations.helicone import HeliconeLogger
-
-        heliconeLogger = HeliconeLogger()
-  except:
-    pass
-
-
-def handle_failure(exception, traceback_exception, args, kwargs):
-    global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
-    try:
-      print_verbose(f"handle_failure args: {args}")
-      print_verbose(f"handle_failure kwargs: {kwargs}")
-      
-      success_handler = additional_details.pop("success_handler", None)
-      failure_handler = additional_details.pop("failure_handler", None)
-      
-      additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
-      print_verbose(f"self.failure_callback: {litellm.failure_callback}")
-
-      print_verbose(f"additional_details: {additional_details}")
-      for callback in litellm.failure_callback:
-        try:
-          if callback == "slack":
-            slack_msg = "" 
-            if len(kwargs) > 0: 
-              for key in kwargs: 
-                slack_msg += f"{key}: {kwargs[key]}\n"
-            if len(args) > 0:
-              for i, arg in enumerate(args):
-                slack_msg += f"LiteLLM_Args_{str(i)}: {arg}"
-            for detail in additional_details: 
-              slack_msg += f"{detail}: {additional_details[detail]}\n"
-            slack_msg += f"Traceback: {traceback_exception}"
-            slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
-          elif callback == "sentry":
-            capture_exception(exception)
-          elif callback == "posthog": 
-            print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}")
-            ph_obj = {}
-            if len(kwargs) > 0: 
-              ph_obj = kwargs
-            if len(args) > 0:
-              for i, arg in enumerate(args):
-                ph_obj["litellm_args_" + str(i)] = arg
-            for detail in additional_details:
-              ph_obj[detail] = additional_details[detail]
-            event_name = additional_details["Event_Name"]
-            print_verbose(f"ph_obj: {ph_obj}")
-            print_verbose(f"PostHog Event Name: {event_name}")
-            if "user_id" in additional_details:
-              posthog.capture(additional_details["user_id"], event_name, ph_obj)
-            else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
-              unique_id = str(uuid.uuid4())
-              posthog.capture(unique_id, event_name)
-              print_verbose(f"successfully logged to PostHog!")
-        except:
-          print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
-          pass
-      
-      if failure_handler and callable(failure_handler):
-        call_details = {
-          "exception": exception,
-          "additional_details": additional_details
-        }
-        failure_handler(call_details)
-      pass
-    except:
-      pass
-
-def handle_success(args, kwargs, result, start_time, end_time):
-  global heliconeLogger
-  try:
-    success_handler = additional_details.pop("success_handler", None)
-    failure_handler = additional_details.pop("failure_handler", None)
-    additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query")
-    for callback in litellm.success_callback:
-      try:
-        if callback == "posthog":
-          ph_obj = {}
-          for detail in additional_details:
-            ph_obj[detail] = additional_details[detail]
-          event_name = additional_details["Event_Name"]
-          if "user_id" in additional_details:
-            posthog.capture(additional_details["user_id"], event_name, ph_obj)
-          else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
-            unique_id = str(uuid.uuid4())
-            posthog.capture(unique_id, event_name, ph_obj)
-          pass
-        elif callback == "slack":
-          slack_msg = "" 
-          for detail in additional_details: 
-            slack_msg += f"{detail}: {additional_details[detail]}\n"
-          slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
-        elif callback == "helicone":
-          print_verbose("reaches helicone for logging!")
-          model = args[0] if len(args) > 0 else kwargs["model"]
-          messages = args[1] if len(args) > 1 else kwargs["messages"]
-          heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
-      except:
-        print_verbose(f"Success Callback Error - {traceback.format_exc()}")
-        pass
-
-    if success_handler and callable(success_handler):
-      success_handler(args, kwargs)
-    pass
-  except:
-    print_verbose(f"Success Callback Error - {traceback.format_exc()}")
-    pass
-
-
-def exception_type(model, original_exception):
-    try:
-      if isinstance(original_exception, OpenAIError):
-          # Handle the OpenAIError
-          raise original_exception
-      elif model:
-        error_str = str(original_exception)
-        if isinstance(original_exception, BaseException):
-          exception_type = type(original_exception).__name__
-        else:
-          exception_type = ""
-        if "claude" in model: #one of the anthropics
-          if "status_code" in original_exception:
-            print_verbose(f"status_code: {original_exception.status_code}")
-            if original_exception.status_code == 401:
-              raise AuthenticationError(f"AnthropicException - {original_exception.message}")
-            elif original_exception.status_code == 400:
-              raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
-            elif original_exception.status_code == 429:
-              raise RateLimitError(f"AnthropicException - {original_exception.message}")
-        elif "replicate" in model:
-          if "Incorrect authentication token" in error_str:
-            raise AuthenticationError(f"ReplicateException - {error_str}")
-          elif exception_type == "ModelError":
-            raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
-          elif "Request was throttled" in error_str:
-            raise RateLimitError(f"ReplicateException - {error_str}")
-          elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
-            raise ServiceUnavailableError(f"ReplicateException - {error_str}")
-        elif model == "command-nightly": #Cohere
-          if "invalid api token" in error_str or "No API key provided." in error_str:
-            raise AuthenticationError(f"CohereException - {error_str}")
-          elif "too many tokens" in error_str:
-            raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
-          elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
-            raise RateLimitError(f"CohereException - {original_exception.message}")
-        raise original_exception # base case - return the original exception
-      else:
-        raise original_exception
-    except:
-      raise original_exception
-
-def safe_crash_reporting(model=None, exception=None, azure=None):
-    data = {
-      "model": model,
-      "exception": str(exception),
-      "azure": azure
-    }
-    threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
-
-def litellm_telemetry(data):
-    # Load or generate the UUID
-    uuid_file = 'litellm_uuid.txt'
-    try:
-        # Try to open the file and load the UUID
-        with open(uuid_file, 'r') as file:
-            uuid_value = file.read()
-            if uuid_value:
-                uuid_value = uuid_value.strip()
-            else:
-                raise FileNotFoundError
-    except FileNotFoundError:
-        # Generate a new UUID if the file doesn't exist or is empty
-        new_uuid = uuid.uuid4()
-        uuid_value = str(new_uuid)
-        with open(uuid_file, 'w') as file:
-            file.write(uuid_value)
-
-    # Prepare the data to send to localhost:3000
-    payload = {
-        'uuid': uuid_value,
-        'data': data
-    }
-    print_verbose(f"payload: {payload}")
-    try:
-      # Make the POST request to localhost:3000
-      response = requests.post('https://litellm.berri.ai/logging', json=payload)
-      response.raise_for_status()  # Raise an exception for HTTP errors
-    except requests.exceptions.RequestException as e:
-        # Handle any errors in the request
-        pass
\ No newline at end of file
diff --git a/dist/litellm-0.1.229-py3-none-any.whl b/dist/litellm-0.1.229-py3-none-any.whl
deleted file mode 100644
index 5d4c7ac2c..000000000
Binary files a/dist/litellm-0.1.229-py3-none-any.whl and /dev/null differ
diff --git a/dist/litellm-0.1.229.tar.gz b/dist/litellm-0.1.229.tar.gz
deleted file mode 100644
index a157ac869..000000000
Binary files a/dist/litellm-0.1.229.tar.gz and /dev/null differ
diff --git a/dist/litellm-0.1.2291-py3-none-any.whl b/dist/litellm-0.1.2291-py3-none-any.whl
deleted file mode 100644
index b9128dbd0..000000000
Binary files a/dist/litellm-0.1.2291-py3-none-any.whl and /dev/null differ
diff --git a/dist/litellm-0.1.2291.tar.gz b/dist/litellm-0.1.2291.tar.gz
deleted file mode 100644
index 1a286f7bf..000000000
Binary files a/dist/litellm-0.1.2291.tar.gz and /dev/null differ
diff --git a/litellm.egg-info/PKG-INFO b/litellm.egg-info/PKG-INFO
deleted file mode 100644
index f01915a43..000000000
--- a/litellm.egg-info/PKG-INFO
+++ /dev/null
@@ -1,6 +0,0 @@
-Metadata-Version: 2.1
-Name: litellm
-Version: 0.1.2291
-Summary: Library to easily interface with LLM API providers
-Author: BerriAI
-License-File: LICENSE
diff --git a/litellm.egg-info/SOURCES.txt b/litellm.egg-info/SOURCES.txt
deleted file mode 100644
index 88f47e84f..000000000
--- a/litellm.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-LICENSE
-README.md
-pyproject.toml
-setup.py
-litellm/__init__.py
-litellm/main.py
-litellm/timeout.py
-litellm/utils.py
-litellm.egg-info/PKG-INFO
-litellm.egg-info/SOURCES.txt
-litellm.egg-info/dependency_links.txt
-litellm.egg-info/requires.txt
-litellm.egg-info/top_level.txt
-litellm/integrations/__init__.py
-litellm/integrations/helicone.py
\ No newline at end of file
diff --git a/litellm.egg-info/dependency_links.txt b/litellm.egg-info/dependency_links.txt
deleted file mode 100644
index 8b1378917..000000000
--- a/litellm.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/litellm.egg-info/requires.txt b/litellm.egg-info/requires.txt
deleted file mode 100644
index b59e88b89..000000000
--- a/litellm.egg-info/requires.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-openai
-cohere
-pytest
-anthropic
-replicate
-python-dotenv
-openai[datalib]
-tenacity
diff --git a/litellm.egg-info/top_level.txt b/litellm.egg-info/top_level.txt
deleted file mode 100644
index 8e637fbf5..000000000
--- a/litellm.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-litellm