bug fixes and updates

This commit is contained in:
Krrish Dholakia 2023-08-02 13:27:10 -07:00
parent 74ddafb7ad
commit 104b9f21b0
17 changed files with 646 additions and 330 deletions

View file

@ -1,2 +1,31 @@
__version__ = "1.0.0"
from .main import * # Import all the symbols from main.py
success_callback = []
failure_callback = []
set_verbose=False
telemetry=True
####### COMPLETION MODELS ###################
open_ai_chat_completion_models = [
'gpt-3.5-turbo',
'gpt-4'
]
open_ai_text_completion_models = [
'text-davinci-003'
]
cohere_models = [
'command-nightly',
]
anthropic_models = [
"claude-2",
"claude-instant-1"
]
####### EMBEDDING MODELS ###################
open_ai_embedding_models = [
'text-embedding-ada-002'
]
from .timeout import timeout
from .utils import client, logging, exception_type # Import all the symbols from main.py
from .main import * # Import all the symbols from main.py

View file

@ -1,49 +1,77 @@
import os, openai, cohere, replicate, sys
from typing import Any
from func_timeout import func_set_timeout, FunctionTimedOut
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
import json
import traceback
import threading
import dotenv
import traceback
import subprocess
import litellm
from litellm import client, logging, exception_type, timeout, success_callback, failure_callback
import random
####### ENVIRONMENT VARIABLES ###################
# Loading env variables using dotenv
dotenv.load_dotenv()
set_verbose = False
####### COMPLETION MODELS ###################
open_ai_chat_completion_models = [
'gpt-3.5-turbo',
'gpt-4'
]
open_ai_text_completion_models = [
'text-davinci-003'
]
cohere_models = [
'command-nightly',
]
anthropic_models = [
"claude-2",
"claude-instant-1"
]
####### EMBEDDING MODELS ###################
open_ai_embedding_models = [
'text-embedding-ada-002'
]
#############################################
dotenv.load_dotenv() # Loading env variables using dotenv
def get_optional_params(
# 12 optional params
functions = [],
function_call = "",
temperature = 1,
top_p = 1,
n = 1,
stream = False,
stop = None,
max_tokens = float('inf'),
presence_penalty = 0,
frequency_penalty = 0,
logit_bias = {},
user = "",
):
optional_params = {}
if functions != []:
optional_params["functions"] = functions
if function_call != "":
optional_params["function_call"] = function_call
if temperature != 1:
optional_params["temperature"] = temperature
if top_p != 1:
optional_params["top_p"] = top_p
if n != 1:
optional_params["n"] = n
if stream:
optional_params["stream"] = stream
if stop != None:
optional_params["stop"] = stop
if max_tokens != float('inf'):
optional_params["max_tokens"] = max_tokens
if presence_penalty != 0:
optional_params["presence_penalty"] = presence_penalty
if frequency_penalty != 0:
optional_params["frequency_penalty"] = frequency_penalty
if logit_bias != {}:
optional_params["logit_bias"] = logit_bias
if user != "":
optional_params["user"] = user
return optional_params
####### COMPLETION ENDPOINTS ################
#############################################
@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure)
def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None):
@client
@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
def completion(
model, messages, # required params
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
functions=[], function_call="", # optional params
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="",
# Optional liteLLM function params
*, force_timeout=60, azure=False, logger_fn=None, verbose=False
):
try:
# check if user passed in any of the OpenAI optional params
optional_params = get_optional_params(
functions=functions, function_call=function_call,
temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user
)
if azure == True:
# azure configs
openai.api_type = "azure"
@ -51,21 +79,49 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
openai.api_version = os.environ.get("AZURE_API_VERSION")
openai.api_key = os.environ.get("AZURE_API_KEY")
## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
## COMPLETION CALL
response = openai.ChatCompletion.create(
engine=model,
messages = messages
messages = messages,
**optional_params
)
elif "replicate" in model:
elif model in litellm.open_ai_chat_completion_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
openai.api_key = os.environ.get("OPENAI_API_KEY")
## LOGGING
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
## COMPLETION CALL
response = openai.ChatCompletion.create(
model=model,
messages = messages,
**optional_params
)
elif model in litellm.open_ai_text_completion_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
openai.api_key = os.environ.get("OPENAI_API_KEY")
prompt = " ".join([message["content"] for message in messages])
## LOGGING
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
## COMPLETION CALL
response = openai.Completion.create(
model=model,
prompt = prompt
)
elif "replicate" in model:
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
# checking in case user set it to REPLICATE_API_KEY instead
if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
prompt = " ".join([message["content"] for message in messages])
input = [{"prompt": prompt}]
if max_tokens:
input = {"prompt": prompt}
if max_tokens != float('inf'):
input["max_length"] = max_tokens # for t5 models
input["max_new_tokens"] = max_tokens # for llama2 models
## LOGGING
@ -90,7 +146,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
]
}
response = new_response
elif model in anthropic_models:
elif model in litellm.anthropic_models:
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
prompt = f"{HUMAN_PROMPT}"
for message in messages:
@ -103,9 +159,10 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
prompt += f"{HUMAN_PROMPT}{message['content']}"
prompt += f"{AI_PROMPT}"
anthropic = Anthropic()
if max_tokens:
# check if user passed in max_tokens != float('inf')
if max_tokens != float('inf'):
max_tokens_to_sample = max_tokens
else:
else:
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
## LOGGING
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
@ -127,9 +184,9 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
}
]
}
print(f"new response: {new_response}")
print_verbose(f"new response: {new_response}")
response = new_response
elif model in cohere_models:
elif model in litellm.cohere_models:
cohere_key = os.environ.get("COHERE_API_KEY")
co = cohere.Client(cohere_key)
prompt = " ".join([message["content"] for message in messages])
@ -146,7 +203,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
"finish_reason": "stop",
"index": 0,
"message": {
"content": response[0],
"content": response[0].text,
"role": "assistant"
}
}
@ -154,7 +211,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
}
response = new_response
elif model in open_ai_chat_completion_models:
elif model in litellm.open_ai_chat_completion_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
@ -166,7 +223,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
model=model,
messages = messages
)
elif model in open_ai_text_completion_models:
elif model in litellm.open_ai_text_completion_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
@ -181,249 +238,59 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
)
else:
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
args = locals()
raise ValueError(f"No valid completion model args passed in - {args}")
return response
except Exception as e:
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
raise e
# log the original exception
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
## Map to OpenAI Exception
raise exception_type(model=model, original_exception=e)
### EMBEDDING ENDPOINTS ####################
@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/
def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
response = None
if azure == True:
# azure configs
openai.api_type = "azure"
openai.api_base = os.environ.get("AZURE_API_BASE")
openai.api_version = os.environ.get("AZURE_API_VERSION")
openai.api_key = os.environ.get("AZURE_API_KEY")
## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
## EMBEDDING CALL
response = openai.Embedding.create(input=input, engine=model)
print_verbose(f"response_value: {str(response)[:50]}")
elif model in open_ai_embedding_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
openai.api_key = os.environ.get("OPENAI_API_KEY")
## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
## EMBEDDING CALL
response = openai.Embedding.create(input=input, model=model)
print_verbose(f"response_value: {str(response)[:50]}")
else:
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
return response
### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc.
class litellm_client:
def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor
set_verbose = verbose
self.success_callback = success_callback
self.failure_callback = failure_callback
self.logger_fn = None # if user passes in their own logging function
self.callback_list = list(set(self.success_callback + self.failure_callback))
self.set_callbacks()
## COMPLETION CALL
def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any:
try:
self.logger_fn = logger_fn
response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input)
my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread
my_thread.start()
return response
except Exception as e:
args = locals() # get all the param values
self.handle_failure(e, args)
raise e
## EMBEDDING CALL
def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any:
try:
self.logger_fn = logger_fn
response = embedding(model, input, azure=azure, logger_fn=self.handle_input)
my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread
my_thread.start()
return response
except Exception as e:
args = locals() # get all the param values
self.handle_failure(e, args)
raise e
def set_callbacks(self): #instantiate any external packages
for callback in self.callback_list: # only install what's required
if callback == "sentry":
try:
import sentry_sdk
except ImportError:
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
import sentry_sdk
self.sentry_sdk = sentry_sdk
self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
self.capture_exception = self.sentry_sdk.capture_exception
self.add_breadcrumb = self.sentry_sdk.add_breadcrumb
elif callback == "posthog":
try:
from posthog import Posthog
except:
print_verbose("Package 'posthog' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
from posthog import Posthog
self.posthog = Posthog(
project_api_key=os.environ.get("POSTHOG_API_KEY"),
host=os.environ.get("POSTHOG_API_URL"))
elif callback == "slack":
try:
from slack_bolt import App
except ImportError:
print_verbose("Package 'slack_bolt' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
from slack_bolt import App
self.slack_app = App(
token=os.environ.get("SLACK_API_TOKEN"),
signing_secret=os.environ.get("SLACK_API_SECRET")
)
self.alerts_channel = os.environ["SLACK_API_CHANNEL"]
def handle_input(self, model_call_details={}):
if len(model_call_details.keys()) > 0:
model = model_call_details["model"] if "model" in model_call_details else None
if model:
for callback in self.callback_list:
if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration
self.add_breadcrumb(
category=f'{model}',
message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)),
level='info',
)
if self.logger_fn and callable(self.logger_fn):
self.logger_fn(model_call_details)
pass
def handle_success(self, model, messages, additional_details):
success_handler = additional_details.pop("success_handler", None)
failure_handler = additional_details.pop("failure_handler", None)
additional_details["litellm_model"] = str(model)
additional_details["litellm_messages"] = str(messages)
for callback in self.success_callback:
try:
if callback == "posthog":
ph_obj = {}
for detail in additional_details:
ph_obj[detail] = additional_details[detail]
event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query"
if "user_id" in additional_details:
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
else:
self.posthog.capture(event_name, ph_obj)
pass
elif callback == "slack":
slack_msg = ""
if len(additional_details.keys()) > 0:
for detail in additional_details:
slack_msg += f"{detail}: {additional_details[detail]}\n"
slack_msg += f"Successful call"
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
except:
pass
if success_handler and callable(success_handler):
call_details = {
"model": model,
"messages": messages,
"additional_details": additional_details
}
success_handler(call_details)
pass
def handle_failure(self, exception, args):
args.pop("self")
additional_details = args.pop("additional_details", {})
success_handler = additional_details.pop("success_handler", None)
failure_handler = additional_details.pop("failure_handler", None)
for callback in self.failure_callback:
try:
if callback == "slack":
slack_msg = ""
for param in args:
slack_msg += f"{param}: {args[param]}\n"
if len(additional_details.keys()) > 0:
for detail in additional_details:
slack_msg += f"{detail}: {additional_details[detail]}\n"
slack_msg += f"Traceback: {traceback.format_exc()}"
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
elif callback == "sentry":
self.capture_exception(exception)
elif callback == "posthog":
if len(additional_details.keys()) > 0:
ph_obj = {}
for param in args:
ph_obj[param] += args[param]
for detail in additional_details:
ph_obj[detail] = additional_details[detail]
event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query"
if "user_id" in additional_details:
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
else:
self.posthog.capture(event_name, ph_obj)
else:
pass
except:
print(f"got an error calling {callback} - {traceback.format_exc()}")
if failure_handler and callable(failure_handler):
call_details = {
"exception": exception,
"additional_details": additional_details
}
failure_handler(call_details)
pass
####### HELPER FUNCTIONS ################
#Logging function -> log the exact model details + what's being sent | Non-Blocking
def logging(model, input, azure=False, additional_args={}, logger_fn=None):
@client
@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
try:
model_call_details = {}
model_call_details["model"] = model
model_call_details["input"] = input
model_call_details["azure"] = azure
model_call_details["additional_args"] = additional_args
if logger_fn and callable(logger_fn):
try:
# log additional call details -> api key, etc.
if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models:
model_call_details["api_type"] = openai.api_type
model_call_details["api_base"] = openai.api_base
model_call_details["api_version"] = openai.api_version
model_call_details["api_key"] = openai.api_key
elif "replicate" in model:
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
elif model in anthropic_models:
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
elif model in cohere_models:
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
except:
print_verbose(f"Basic model call details: {model_call_details}")
print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
pass
else:
print_verbose(f"Basic model call details: {model_call_details}")
pass
except:
pass
## Set verbose to true -> ```litellm.verbose = True```
response = None
if azure == True:
# azure configs
openai.api_type = "azure"
openai.api_base = os.environ.get("AZURE_API_BASE")
openai.api_version = os.environ.get("AZURE_API_VERSION")
openai.api_key = os.environ.get("AZURE_API_KEY")
## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
## EMBEDDING CALL
response = openai.Embedding.create(input=input, engine=model)
print_verbose(f"response_value: {str(response)[:50]}")
elif model in litellm.open_ai_embedding_models:
openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1"
openai.api_version = None
openai.api_key = os.environ.get("OPENAI_API_KEY")
## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
## EMBEDDING CALL
response = openai.Embedding.create(input=input, model=model)
print_verbose(f"response_value: {str(response)[:50]}")
else:
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
args = locals()
raise ValueError(f"No valid embedding model args passed in - {args}")
return response
except Exception as e:
# log the original exception
logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e)
## Map to OpenAI Exception
raise exception_type(model=model, original_exception=e)
####### HELPER FUNCTIONS ################
## Set verbose to true -> ```litellm.set_verbose = True```
def print_verbose(print_statement):
if set_verbose:
if litellm.set_verbose:
print(f"LiteLLM: {print_statement}")
print("Get help - https://discord.com/invite/wuPM9dRgDw")
if random.random() <= 0.3:
print("Get help - https://discord.com/invite/wuPM9dRgDw")

View file

@ -0,0 +1,80 @@
"""
Module containing "timeout" decorator for sync and async callables.
"""
import asyncio
from concurrent import futures
from inspect import iscoroutinefunction
from functools import wraps
from threading import Thread
from openai.error import Timeout
def timeout(
timeout_duration: float = None, exception_to_raise = Timeout
):
"""
Wraps a function to raise the specified exception if execution time
is greater than the specified timeout.
Works with both synchronous and asynchronous callables, but with synchronous ones will introduce
some overhead due to the backend use of threads and asyncio.
:param float timeout_duration: Timeout duration in seconds. If none callable won't time out.
:param OpenAIError exception_to_raise: Exception to raise when the callable times out.
Defaults to TimeoutError.
:return: The decorated function.
:rtype: callable
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
async def async_func():
return func(*args, **kwargs)
thread = _LoopWrapper()
thread.start()
future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop)
try:
local_timeout_duration = timeout_duration
if "force_timeout" in kwargs:
local_timeout_duration = kwargs["force_timeout"]
result = future.result(timeout=local_timeout_duration)
except futures.TimeoutError:
thread.stop_loop()
raise exception_to_raise()
thread.stop_loop()
return result
@wraps(func)
async def async_wrapper(*args, **kwargs):
try:
value = await asyncio.wait_for(
func(*args, **kwargs), timeout=timeout_duration
)
return value
except asyncio.TimeoutError:
raise exception_to_raise()
if iscoroutinefunction(func):
return async_wrapper
return wrapper
return decorator
class _LoopWrapper(Thread):
def __init__(self):
super().__init__(daemon=True)
self.loop = asyncio.new_event_loop()
def run(self) -> None:
self.loop.run_forever()
self.loop.call_soon_threadsafe(self.loop.close)
def stop_loop(self):
for task in asyncio.all_tasks(self.loop):
task.cancel()
self.loop.call_soon_threadsafe(self.loop.stop)

316
build/lib/litellm/utils.py Normal file
View file

@ -0,0 +1,316 @@
import dotenv, json, traceback, threading
import subprocess, os
import litellm, openai
import random, uuid, requests
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
####### ENVIRONMENT VARIABLES ###################
dotenv.load_dotenv() # Loading env variables using dotenv
sentry_sdk_instance = None
capture_exception = None
add_breadcrumb = None
posthog = None
slack_app = None
alerts_channel = None
callback_list = []
user_logger_fn = None
additional_details = {}
def print_verbose(print_statement):
if litellm.set_verbose:
print(f"LiteLLM: {print_statement}")
if random.random() <= 0.3:
print("Get help - https://discord.com/invite/wuPM9dRgDw")
####### LOGGING ###################
#Logging function -> log the exact model details + what's being sent | Non-Blocking
def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
try:
model_call_details = {}
model_call_details["model"] = model
model_call_details["azure"] = azure
# log exception details
if exception:
model_call_details["original_exception"] = exception
if litellm.telemetry:
safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
model_call_details["input"] = input
# log additional call details -> api key, etc.
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
model_call_details["api_type"] = openai.api_type
model_call_details["api_base"] = openai.api_base
model_call_details["api_version"] = openai.api_version
model_call_details["api_key"] = openai.api_key
elif "replicate" in model:
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
elif model in litellm.anthropic_models:
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
elif model in litellm.cohere_models:
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
model_call_details["additional_args"] = additional_args
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
print_verbose(f"Basic model call details: {model_call_details}")
if logger_fn and callable(logger_fn):
try:
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
except:
print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
except:
traceback.print_exc()
pass
####### CLIENT ###################
# make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
def client(original_function):
def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
try:
global callback_list, add_breadcrumb
if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0:
callback_list = list(set(litellm.success_callback + litellm.failure_callback))
set_callbacks(callback_list=callback_list)
if add_breadcrumb:
add_breadcrumb(
category="litellm.llm_call",
message=f"Positional Args: {args}, Keyword Args: {kwargs}",
level="info",
)
except: # DO NOT BLOCK running the function because of this
print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
pass
def wrapper(*args, **kwargs):
try:
function_setup(args, kwargs)
## MODEL CALL
result = original_function(*args, **kwargs)
## LOG SUCCESS
my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread
my_thread.start()
return result
except Exception as e:
traceback_exception = traceback.format_exc()
my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread
my_thread.start()
raise e
return wrapper
####### HELPER FUNCTIONS ################
def set_callbacks(callback_list):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
try:
for callback in callback_list:
if callback == "sentry":
try:
import sentry_sdk
except ImportError:
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
import sentry_sdk
sentry_sdk_instance = sentry_sdk
sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
capture_exception = sentry_sdk_instance.capture_exception
add_breadcrumb = sentry_sdk_instance.add_breadcrumb
elif callback == "posthog":
try:
from posthog import Posthog
except ImportError:
print_verbose("Package 'posthog' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
from posthog import Posthog
posthog = Posthog(
project_api_key=os.environ.get("POSTHOG_API_KEY"),
host=os.environ.get("POSTHOG_API_URL"))
elif callback == "slack":
try:
from slack_bolt import App
except ImportError:
print_verbose("Package 'slack_bolt' is missing. Installing it...")
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
from slack_bolt import App
slack_app = App(
token=os.environ.get("SLACK_API_TOKEN"),
signing_secret=os.environ.get("SLACK_API_SECRET")
)
alerts_channel = os.environ["SLACK_API_CHANNEL"]
print_verbose(f"Initialized Slack App: {slack_app}")
except:
pass
def handle_failure(exception, traceback_exception, args, kwargs):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
try:
print_verbose(f"handle_failure args: {args}")
print_verbose(f"handle_failure kwargs: {kwargs}")
success_handler = additional_details.pop("success_handler", None)
failure_handler = additional_details.pop("failure_handler", None)
additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
print_verbose(f"self.failure_callback: {litellm.failure_callback}")
print_verbose(f"additional_details: {additional_details}")
for callback in litellm.failure_callback:
try:
if callback == "slack":
slack_msg = ""
if len(kwargs) > 0:
for key in kwargs:
slack_msg += f"{key}: {kwargs[key]}\n"
if len(args) > 0:
for i, arg in enumerate(args):
slack_msg += f"LiteLLM_Args_{str(i)}: {arg}"
for detail in additional_details:
slack_msg += f"{detail}: {additional_details[detail]}\n"
slack_msg += f"Traceback: {traceback_exception}"
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
elif callback == "sentry":
capture_exception(exception)
elif callback == "posthog":
print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}")
ph_obj = {}
if len(kwargs) > 0:
ph_obj = kwargs
if len(args) > 0:
for i, arg in enumerate(args):
ph_obj["litellm_args_" + str(i)] = arg
for detail in additional_details:
ph_obj[detail] = additional_details[detail]
event_name = additional_details["Event_Name"]
print_verbose(f"ph_obj: {ph_obj}")
print_verbose(f"PostHog Event Name: {event_name}")
if "user_id" in additional_details:
posthog.capture(additional_details["user_id"], event_name, ph_obj)
else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
unique_id = str(uuid.uuid4())
posthog.capture(unique_id, event_name)
print_verbose(f"successfully logged to PostHog!")
except:
print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
pass
if failure_handler and callable(failure_handler):
call_details = {
"exception": exception,
"additional_details": additional_details
}
failure_handler(call_details)
pass
except:
pass
def handle_success(*args, **kwargs):
try:
success_handler = additional_details.pop("success_handler", None)
failure_handler = additional_details.pop("failure_handler", None)
additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query")
for callback in litellm.success_callback:
try:
if callback == "posthog":
ph_obj = {}
for detail in additional_details:
ph_obj[detail] = additional_details[detail]
event_name = additional_details["Event_Name"]
if "user_id" in additional_details:
posthog.capture(additional_details["user_id"], event_name, ph_obj)
else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
unique_id = str(uuid.uuid4())
posthog.capture(unique_id, event_name, ph_obj)
pass
elif callback == "slack":
slack_msg = ""
for detail in additional_details:
slack_msg += f"{detail}: {additional_details[detail]}\n"
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
except:
pass
if success_handler and callable(success_handler):
success_handler(args, kwargs)
pass
except:
pass
def exception_type(model, original_exception):
try:
if isinstance(original_exception, OpenAIError):
# Handle the OpenAIError
raise original_exception
elif model:
error_str = str(original_exception)
if isinstance(original_exception, BaseException):
exception_type = type(original_exception).__name__
else:
exception_type = ""
if "claude" in model: #one of the anthropics
if "status_code" in original_exception:
print_verbose(f"status_code: {original_exception.status_code}")
if original_exception.status_code == 401:
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
elif original_exception.status_code == 400:
raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
elif original_exception.status_code == 429:
raise RateLimitError(f"AnthropicException - {original_exception.message}")
elif "replicate" in model:
if "Incorrect authentication token" in error_str:
raise AuthenticationError(f"ReplicateException - {error_str}")
elif exception_type == "ModelError":
raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
elif "Request was throttled" in error_str:
raise RateLimitError(f"ReplicateException - {error_str}")
elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
raise ServiceUnavailableError(f"ReplicateException - {error_str}")
elif model == "command-nightly": #Cohere
if "invalid api token" in error_str or "No API key provided." in error_str:
raise AuthenticationError(f"CohereException - {error_str}")
elif "too many tokens" in error_str:
raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
raise RateLimitError(f"CohereException - {original_exception.message}")
raise original_exception # base case - return the original exception
else:
raise original_exception
except:
raise original_exception
def safe_crash_reporting(model=None, exception=None, azure=None):
data = {
"model": model,
"exception": str(exception),
"azure": azure
}
threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
def litellm_telemetry(data):
# Load or generate the UUID
uuid_file = 'litellm_uuid.txt'
try:
# Try to open the file and load the UUID
with open(uuid_file, 'r') as file:
uuid_value = file.read()
if uuid_value:
uuid_value = uuid_value.strip()
else:
raise FileNotFoundError
except FileNotFoundError:
# Generate a new UUID if the file doesn't exist or is empty
new_uuid = uuid.uuid4()
uuid_value = str(new_uuid)
with open(uuid_file, 'w') as file:
file.write(uuid_value)
# Prepare the data to send to localhost:3000
payload = {
'uuid': uuid_value,
'data': data
}
print_verbose(f"payload: {payload}")
try:
# Make the POST request to localhost:3000
response = requests.post('https://litellm.berri.ai/logging', json=payload)
response.raise_for_status() # Raise an exception for HTTP errors
except requests.exceptions.RequestException as e:
# Handle any errors in the request
pass

Binary file not shown.

Binary file not shown.

BIN
dist/litellm-0.1.216-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.216.tar.gz vendored Normal file

Binary file not shown.

View file

@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: litellm
Version: 0.1.207
Version: 0.1.216
Summary: Library to easily interface with LLM API providers
Author: BerriAI
License-File: LICENSE

View file

@ -1,5 +1,6 @@
LICENSE
README.md
pyproject.toml
setup.py
litellm/__init__.py
litellm/main.py

View file

@ -57,3 +57,4 @@ def test_good_azure_embedding():
print(f"response: {str(response)[:50]}")
except Exception as e:
pytest.fail(f"Error occurred: {e}")

View file

@ -0,0 +1,23 @@
#### What this tests ####
# This tests error logging (with custom user functions) for the `completion` + `embedding` endpoints without callbacks (i.e. slack, posthog, etc. not set)
# Requirements: Remove any env keys you have related to slack/posthog/etc. + anthropic api key (cause an exception)
import sys, os
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
litellm.set_verbose = True
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
for model in model_fallback_list:
try:
response = embedding(model="text-embedding-ada-002", input=[user_message])
response = completion(model=model, messages=messages)
except Exception as e:
print(f"error occurred: {traceback.format_exc()}")

View file

@ -234,41 +234,45 @@ def handle_success(*args, **kwargs):
def exception_type(model, original_exception):
if isinstance(original_exception, OpenAIError):
# Handle the OpenAIError
raise original_exception
elif model:
error_str = str(original_exception)
if isinstance(original_exception, BaseException):
exception_type = type(original_exception).__name__
try:
if isinstance(original_exception, OpenAIError):
# Handle the OpenAIError
raise original_exception
elif model:
error_str = str(original_exception)
if isinstance(original_exception, BaseException):
exception_type = type(original_exception).__name__
else:
exception_type = ""
if "claude" in model: #one of the anthropics
if "status_code" in original_exception:
print_verbose(f"status_code: {original_exception.status_code}")
if original_exception.status_code == 401:
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
elif original_exception.status_code == 400:
raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
elif original_exception.status_code == 429:
raise RateLimitError(f"AnthropicException - {original_exception.message}")
elif "replicate" in model:
if "Incorrect authentication token" in error_str:
raise AuthenticationError(f"ReplicateException - {error_str}")
elif exception_type == "ModelError":
raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
elif "Request was throttled" in error_str:
raise RateLimitError(f"ReplicateException - {error_str}")
elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
raise ServiceUnavailableError(f"ReplicateException - {error_str}")
elif model == "command-nightly": #Cohere
if "invalid api token" in error_str or "No API key provided." in error_str:
raise AuthenticationError(f"CohereException - {error_str}")
elif "too many tokens" in error_str:
raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
raise RateLimitError(f"CohereException - {original_exception.message}")
raise original_exception # base case - return the original exception
else:
exception_type = ""
if "claude" in model: #one of the anthropics
print_verbose(f"status_code: {original_exception.status_code}")
if original_exception.status_code == 401:
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
elif original_exception.status_code == 400:
raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
elif original_exception.status_code == 429:
raise RateLimitError(f"AnthropicException - {original_exception.message}")
elif "replicate" in model:
if "Incorrect authentication token" in error_str:
raise AuthenticationError(f"ReplicateException - {error_str}")
elif exception_type == "ModelError":
raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
elif "Request was throttled" in error_str:
raise RateLimitError(f"ReplicateException - {error_str}")
elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
raise ServiceUnavailableError(f"ReplicateException - {error_str}")
elif model == "command-nightly": #Cohere
if "invalid api token" in error_str or "No API key provided." in error_str:
raise AuthenticationError(f"CohereException - {error_str}")
elif "too many tokens" in error_str:
raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
raise RateLimitError(f"CohereException - {original_exception.message}")
raise original_exception # base case - return the original exception
else:
raise original_exception
except:
raise original_exception
def safe_crash_reporting(model=None, exception=None, azure=None):
@ -277,11 +281,9 @@ def safe_crash_reporting(model=None, exception=None, azure=None):
"exception": str(exception),
"azure": azure
}
print(f"data in crash reporting: {data}")
threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
def litellm_telemetry(data):
print(f"data in in litellm telemetry: {data}")
# Load or generate the UUID
uuid_file = 'litellm_uuid.txt'
try:
@ -290,7 +292,6 @@ def litellm_telemetry(data):
uuid_value = file.read()
if uuid_value:
uuid_value = uuid_value.strip()
print(f"Loaded UUID: {uuid_value}")
else:
raise FileNotFoundError
except FileNotFoundError:
@ -299,7 +300,6 @@ def litellm_telemetry(data):
uuid_value = str(new_uuid)
with open(uuid_file, 'w') as file:
file.write(uuid_value)
print(f"Generated and stored UUID: {uuid_value}")
# Prepare the data to send to localhost:3000
payload = {
@ -311,7 +311,6 @@ def litellm_telemetry(data):
# Make the POST request to localhost:3000
response = requests.post('https://litellm.berri.ai/logging', json=payload)
response.raise_for_status() # Raise an exception for HTTP errors
print('Request successfully sent!')
except requests.exceptions.RequestException as e:
# Handle any errors in the request
print(f'Error: {e}')
pass

View file

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name='litellm',
version='0.1.214',
version='0.1.216',
description='Library to easily interface with LLM API providers',
author='BerriAI',
packages=[