diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 80675f031a..f652890678 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index d4fc60053a..c301a1eb07 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -8,10 +8,28 @@ import threading import dotenv import traceback import subprocess +import uuid + ####### ENVIRONMENT VARIABLES ################### -# Loading env variables using dotenv -dotenv.load_dotenv() +dotenv.load_dotenv() # Loading env variables using dotenv set_verbose = False +sentry_sdk_instance = None +capture_exception = None +add_breadcrumb = None +posthog = None +slack_app = None +alerts_channel = None +success_callback = [] +failure_callback = [] +callback_list = [] +user_logger_fn = None +additional_details = {} + +## Set verbose to true -> ```litellm.verbose = True``` +def print_verbose(print_statement): + if set_verbose: + print(f"LiteLLM: {print_statement}") + print("Get help - https://discord.com/invite/wuPM9dRgDw") ####### COMPLETION MODELS ################### open_ai_chat_completion_models = [ @@ -36,13 +54,39 @@ open_ai_embedding_models = [ 'text-embedding-ada-002' ] -############################################# +####### CLIENT ################### make it easy to log completion/embedding runs +def client(original_function): + def function_setup(): #just run once to check if user wants to send their data anywhere + try: + if len(success_callback) > 0 or len(failure_callback) > 0 and len(callback_list) == 0: + callback_list = list(set(success_callback + failure_callback)) + set_callbacks(callback_list=callback_list) + except: # DO NOT BLOCK running the function because of this + print_verbose(f"[Non-Blocking] {traceback.format_exc()}") + pass + def wrapper(*args, **kwargs): + # Code to be executed before the embedding function + try: + function_setup() + ## EMBEDDING CALL + result = original_function(*args, **kwargs) + ## LOG SUCCESS + my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread + my_thread.start() + return result + except Exception as e: + traceback_exception = traceback.format_exc() + my_thread = threading.Thread(target=handle_failure, args=(e, traceback.format_exc(), args, kwargs)) # don't interrupt execution of main thread + my_thread.start() + raise e + return wrapper ####### COMPLETION ENDPOINTS ################ ############################################# -@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure) -def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None): +@client +@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure) +def completion(model, messages, max_tokens=None, *, forceTimeout=60, azure=False, logger_fn=None): # ,*,.. requires optional params like forceTimeout, azure and logger_fn to be passed in as keyword arguments try: if azure == True: # azure configs @@ -64,7 +108,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l replicate_api_token = os.environ.get("REPLICATE_API_KEY") os.environ["REPLICATE_API_TOKEN"] = replicate_api_token prompt = " ".join([message["content"] for message in messages]) - input = [{"prompt": prompt}] + input = {"prompt": prompt} if max_tokens: input["max_length"] = max_tokens # for t5 models input["max_new_tokens"] = max_tokens # for llama2 models @@ -127,7 +171,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l } ] } - print(f"new response: {new_response}") + print_verbose(f"new response: {new_response}") response = new_response elif model in cohere_models: cohere_key = os.environ.get("COHERE_API_KEY") @@ -188,6 +232,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l ### EMBEDDING ENDPOINTS #################### +@client @func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/ def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None): response = None @@ -214,170 +259,104 @@ def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None): print_verbose(f"response_value: {str(response)[:50]}") else: logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + args = locals() + raise ValueError(f"No valid embedding model args passed in - {args}") return response -### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc. -class litellm_client: - def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor - set_verbose = verbose - self.success_callback = success_callback - self.failure_callback = failure_callback - self.logger_fn = None # if user passes in their own logging function - self.callback_list = list(set(self.success_callback + self.failure_callback)) - self.set_callbacks() - - ## COMPLETION CALL - def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any: - try: - self.logger_fn = logger_fn - response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input) - my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread - my_thread.start() - return response - except Exception as e: - args = locals() # get all the param values - self.handle_failure(e, args) - raise e +####### HELPER FUNCTIONS ################ - ## EMBEDDING CALL - def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any: - try: - self.logger_fn = logger_fn - response = embedding(model, input, azure=azure, logger_fn=self.handle_input) - my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread - my_thread.start() - return response - except Exception as e: - args = locals() # get all the param values - self.handle_failure(e, args) - raise e - - - def set_callbacks(self): #instantiate any external packages - for callback in self.callback_list: # only install what's required - if callback == "sentry": - try: +def set_callbacks(callback_list): + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel + for callback in callback_list: + if callback == "sentry": + try: import sentry_sdk - except ImportError: + except ImportError: print_verbose("Package 'sentry_sdk' is missing. Installing it...") subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk']) import sentry_sdk - self.sentry_sdk = sentry_sdk - self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) - self.capture_exception = self.sentry_sdk.capture_exception - self.add_breadcrumb = self.sentry_sdk.add_breadcrumb - elif callback == "posthog": - try: + sentry_sdk_instance = sentry_sdk + sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) + capture_exception = sentry_sdk_instance.capture_exception + add_breadcrumb = sentry_sdk_instance.add_breadcrumb + elif callback == "posthog": + try: from posthog import Posthog - except: + except ImportError: print_verbose("Package 'posthog' is missing. Installing it...") subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog']) from posthog import Posthog - self.posthog = Posthog( - project_api_key=os.environ.get("POSTHOG_API_KEY"), - host=os.environ.get("POSTHOG_API_URL")) - elif callback == "slack": - try: + posthog = Posthog( + project_api_key=os.environ.get("POSTHOG_API_KEY"), + host=os.environ.get("POSTHOG_API_URL")) + elif callback == "slack": + try: from slack_bolt import App - except ImportError: + except ImportError: print_verbose("Package 'slack_bolt' is missing. Installing it...") subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt']) from slack_bolt import App - self.slack_app = App( - token=os.environ.get("SLACK_API_TOKEN"), - signing_secret=os.environ.get("SLACK_API_SECRET") - ) - self.alerts_channel = os.environ["SLACK_API_CHANNEL"] + slack_app = App( + token=os.environ.get("SLACK_API_TOKEN"), + signing_secret=os.environ.get("SLACK_API_SECRET") + ) + alerts_channel = os.environ["SLACK_API_CHANNEL"] + print_verbose(f"Initialized Slack App: {slack_app}") - def handle_input(self, model_call_details={}): - if len(model_call_details.keys()) > 0: - model = model_call_details["model"] if "model" in model_call_details else None - if model: - for callback in self.callback_list: - if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration - self.add_breadcrumb( - category=f'{model}', - message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)), - level='info', - ) - if self.logger_fn and callable(self.logger_fn): - self.logger_fn(model_call_details) - pass - def handle_success(self, model, messages, additional_details): - success_handler = additional_details.pop("success_handler", None) - failure_handler = additional_details.pop("failure_handler", None) - additional_details["litellm_model"] = str(model) - additional_details["litellm_messages"] = str(messages) - for callback in self.success_callback: - try: - if callback == "posthog": - ph_obj = {} - for detail in additional_details: - ph_obj[detail] = additional_details[detail] - event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query" - if "user_id" in additional_details: - self.posthog.capture(additional_details["user_id"], event_name, ph_obj) - else: - self.posthog.capture(event_name, ph_obj) - pass - elif callback == "slack": - slack_msg = "" - if len(additional_details.keys()) > 0: - for detail in additional_details: - slack_msg += f"{detail}: {additional_details[detail]}\n" - slack_msg += f"Successful call" - self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg) - except: - pass +def handle_failure(exception, traceback_exception, args, kwargs): + print_verbose(f"handle_failure args: {args}") + print_verbose(f"handle_failure kwargs: {kwargs}") - if success_handler and callable(success_handler): - call_details = { - "model": model, - "messages": messages, - "additional_details": additional_details - } - success_handler(call_details) - pass - - def handle_failure(self, exception, args): - args.pop("self") - additional_details = args.pop("additional_details", {}) - success_handler = additional_details.pop("success_handler", None) failure_handler = additional_details.pop("failure_handler", None) + + additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query") + print_verbose(f"self.failure_callback: {failure_callback}") - for callback in self.failure_callback: + print_verbose(f"additional_details: {additional_details}") + for callback in failure_callback: try: if callback == "slack": slack_msg = "" - for param in args: - slack_msg += f"{param}: {args[param]}\n" - if len(additional_details.keys()) > 0: - for detail in additional_details: - slack_msg += f"{detail}: {additional_details[detail]}\n" - slack_msg += f"Traceback: {traceback.format_exc()}" - self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg) + if len(kwargs) > 0: + for key in kwargs: + slack_msg += f"{key}: {kwargs[key]}\n" + if len(args) > 0: + for i, arg in enumerate(args): + slack_msg += f"LiteLLM_Args_{str(i)}: {arg}" + for detail in additional_details: + slack_msg += f"{detail}: {additional_details[detail]}\n" + slack_msg += f"Traceback: {traceback_exception}" + print_verbose(f"This is the slack message: {slack_msg}") + slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg) elif callback == "sentry": - self.capture_exception(exception) - elif callback == "posthog": - if len(additional_details.keys()) > 0: - ph_obj = {} - for param in args: - ph_obj[param] += args[param] - for detail in additional_details: - ph_obj[detail] = additional_details[detail] - event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query" - if "user_id" in additional_details: - self.posthog.capture(additional_details["user_id"], event_name, ph_obj) - else: - self.posthog.capture(event_name, ph_obj) - else: - pass + capture_exception(exception) + elif callback == "posthog": + print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}") + ph_obj = {} + if len(kwargs) > 0: + ph_obj = kwargs + if len(args) > 0: + for i, arg in enumerate(args): + ph_obj["litellm_args_" + str(i)] = arg + print_verbose(f"ph_obj: {ph_obj}") + for detail in additional_details: + ph_obj[detail] = additional_details[detail] + event_name = additional_details["Event_Name"] + print_verbose(f"PostHog Event Name: {event_name}") + if "user_id" in additional_details: + posthog.capture(additional_details["user_id"], event_name, ph_obj) + else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python + print(f"ph_obj: {ph_obj})") + unique_id = str(uuid.uuid4()) + posthog.capture(unique_id, event_name) + print_verbose(f"successfully logged to PostHog!") except: - print(f"got an error calling {callback} - {traceback.format_exc()}") + print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}") + pass if failure_handler and callable(failure_handler): call_details = { @@ -386,7 +365,51 @@ class litellm_client: } failure_handler(call_details) pass -####### HELPER FUNCTIONS ################ + + +def handle_input(model_call_details={}): + if len(model_call_details.keys()) > 0: + model = model_call_details["model"] if "model" in model_call_details else None + if model: + for callback in callback_list: + if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration + add_breadcrumb( + category=f'{model}', + message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)), + level='info', + ) + if user_logger_fn and callable(user_logger_fn): + user_logger_fn(model_call_details) + pass + +def handle_success(*args, **kwargs): + success_handler = additional_details.pop("success_handler", None) + failure_handler = additional_details.pop("failure_handler", None) + additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query") + for callback in success_callback: + try: + if callback == "posthog": + ph_obj = {} + for detail in additional_details: + ph_obj[detail] = additional_details[detail] + event_name = additional_details["Event_Name"] + if "user_id" in additional_details: + posthog.capture(additional_details["user_id"], event_name, ph_obj) + else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python + unique_id = str(uuid.uuid4()) + posthog.capture(unique_id, event_name, ph_obj) + pass + elif callback == "slack": + slack_msg = "" + for detail in additional_details: + slack_msg += f"{detail}: {additional_details[detail]}\n" + slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg) + except: + pass + + if success_handler and callable(success_handler): + success_handler(args, kwargs) + pass #Logging function -> log the exact model details + what's being sent | Non-Blocking def logging(model, input, azure=False, additional_args={}, logger_fn=None): @@ -395,35 +418,26 @@ def logging(model, input, azure=False, additional_args={}, logger_fn=None): model_call_details["model"] = model model_call_details["input"] = input model_call_details["azure"] = azure + # log additional call details -> api key, etc. + if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models: + model_call_details["api_type"] = openai.api_type + model_call_details["api_base"] = openai.api_base + model_call_details["api_version"] = openai.api_version + model_call_details["api_key"] = openai.api_key + elif "replicate" in model: + model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") + elif model in anthropic_models: + model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") + elif model in cohere_models: + model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") model_call_details["additional_args"] = additional_args + ## Logging + print_verbose(f"Basic model call details: {model_call_details}") if logger_fn and callable(logger_fn): try: - # log additional call details -> api key, etc. - if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models: - model_call_details["api_type"] = openai.api_type - model_call_details["api_base"] = openai.api_base - model_call_details["api_version"] = openai.api_version - model_call_details["api_key"] = openai.api_key - elif "replicate" in model: - model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") - elif model in anthropic_models: - model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") - elif model in cohere_models: - model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") - logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object except: - print_verbose(f"Basic model call details: {model_call_details}") print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}") pass - else: - print_verbose(f"Basic model call details: {model_call_details}") - pass except: pass - -## Set verbose to true -> ```litellm.verbose = True``` -def print_verbose(print_statement): - if set_verbose: - print(f"LiteLLM: {print_statement}") - print("Get help - https://discord.com/invite/wuPM9dRgDw") \ No newline at end of file diff --git a/litellm/tests/test_bad_params.py b/litellm/tests/test_bad_params.py index 2b2e4bbcf5..8e06b15e9d 100644 --- a/litellm/tests/test_bad_params.py +++ b/litellm/tests/test_bad_params.py @@ -2,10 +2,9 @@ import sys, os import traceback sys.path.append('..') # Adds the parent directory to the system path import main -from main import litellm_client -client = litellm_client(success_callback=["posthog"], failure_callback=["slack", "sentry", "posthog"], verbose=True) -completion = client.completion -embedding = client.embedding +from main import embedding, completion +main.success_callback = ["posthog"] +main.failure_callback = ["slack", "sentry", "posthog"] main.set_verbose = True diff --git a/litellm/tests/test_client.py b/litellm/tests/test_client.py index 06850ea187..f9399d42cf 100644 --- a/litellm/tests/test_client.py +++ b/litellm/tests/test_client.py @@ -2,27 +2,19 @@ import sys, os import traceback sys.path.append('..') # Adds the parent directory to the system path import main -from main import litellm_client -client = litellm_client(success_callback=["posthog"], failure_callback=["slack", "sentry", "posthog"], verbose=True) -completion = client.completion -embedding = client.embedding +from main import embedding, completion +main.success_callback = ["posthog"] +main.failure_callback = ["slack", "sentry", "posthog"] -main.set_verbose = True +# main.set_verbose = True def logger_fn(model_call_object: dict): - print(f"model call details: {model_call_object}") + # print(f"model call details: {model_call_object}") + pass user_message = "Hello, how are you?" messages = [{ "content": user_message,"role": "user"}] -# test on openai completion call -try: - response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn) -except: - print(f"error occurred: {traceback.format_exc()}") - pass - - # test on openai completion call try: response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn) diff --git a/litellm/tests/test_model_fallback.py b/litellm/tests/test_model_fallback.py new file mode 100644 index 0000000000..cdb4bffa22 --- /dev/null +++ b/litellm/tests/test_model_fallback.py @@ -0,0 +1,25 @@ +import sys, os +import traceback +sys.path.append('..') # Adds the parent directory to the system path +import main +from main import embedding, completion +main.success_callback = ["posthog"] +main.failure_callback = ["slack", "sentry", "posthog"] + +main.set_verbose = True + +model_fallback_list = ["replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1", "claude-instant-1", "gpt-3.5-turbo"] + +user_message = "Hello, how are you?" +messages = [{ "content": user_message,"role": "user"}] + +# for _ in range(10): +for model in model_fallback_list: + try: + response = completion(model=model, messages=messages) + print(response) + if response != None: + break + except: + print(f"error occurred: {traceback.format_exc()}") + pass diff --git a/setup.py b/setup.py index aed2c27472..54ee487134 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='litellm', - version='0.1.2', + version='0.1.203', description='Library to easily interface with LLM API providers', author='BerriAI', packages=[