diff --git a/build/lib/litellm/__init__.py b/build/lib/litellm/__init__.py index fd66e12bf3..73257a8bce 100644 --- a/build/lib/litellm/__init__.py +++ b/build/lib/litellm/__init__.py @@ -1,2 +1,31 @@ -__version__ = "1.0.0" -from .main import * # Import all the symbols from main.py \ No newline at end of file +success_callback = [] +failure_callback = [] +set_verbose=False +telemetry=True +####### COMPLETION MODELS ################### +open_ai_chat_completion_models = [ + 'gpt-3.5-turbo', + 'gpt-4' +] +open_ai_text_completion_models = [ + 'text-davinci-003' +] + +cohere_models = [ + 'command-nightly', +] + +anthropic_models = [ + "claude-2", + "claude-instant-1" +] + +####### EMBEDDING MODELS ################### +open_ai_embedding_models = [ + 'text-embedding-ada-002' +] + +from .timeout import timeout +from .utils import client, logging, exception_type # Import all the symbols from main.py +from .main import * # Import all the symbols from main.py + diff --git a/build/lib/litellm/main.py b/build/lib/litellm/main.py index d4fc60053a..a90c9bf81f 100644 --- a/build/lib/litellm/main.py +++ b/build/lib/litellm/main.py @@ -1,49 +1,77 @@ import os, openai, cohere, replicate, sys from typing import Any -from func_timeout import func_set_timeout, FunctionTimedOut from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT -import json import traceback -import threading import dotenv import traceback -import subprocess +import litellm +from litellm import client, logging, exception_type, timeout, success_callback, failure_callback +import random ####### ENVIRONMENT VARIABLES ################### -# Loading env variables using dotenv -dotenv.load_dotenv() -set_verbose = False - -####### COMPLETION MODELS ################### -open_ai_chat_completion_models = [ - 'gpt-3.5-turbo', - 'gpt-4' -] -open_ai_text_completion_models = [ - 'text-davinci-003' -] - -cohere_models = [ - 'command-nightly', -] - -anthropic_models = [ - "claude-2", - "claude-instant-1" -] - -####### EMBEDDING MODELS ################### -open_ai_embedding_models = [ - 'text-embedding-ada-002' -] - -############################################# +dotenv.load_dotenv() # Loading env variables using dotenv +def get_optional_params( + # 12 optional params + functions = [], + function_call = "", + temperature = 1, + top_p = 1, + n = 1, + stream = False, + stop = None, + max_tokens = float('inf'), + presence_penalty = 0, + frequency_penalty = 0, + logit_bias = {}, + user = "", +): + optional_params = {} + if functions != []: + optional_params["functions"] = functions + if function_call != "": + optional_params["function_call"] = function_call + if temperature != 1: + optional_params["temperature"] = temperature + if top_p != 1: + optional_params["top_p"] = top_p + if n != 1: + optional_params["n"] = n + if stream: + optional_params["stream"] = stream + if stop != None: + optional_params["stop"] = stop + if max_tokens != float('inf'): + optional_params["max_tokens"] = max_tokens + if presence_penalty != 0: + optional_params["presence_penalty"] = presence_penalty + if frequency_penalty != 0: + optional_params["frequency_penalty"] = frequency_penalty + if logit_bias != {}: + optional_params["logit_bias"] = logit_bias + if user != "": + optional_params["user"] = user + return optional_params ####### COMPLETION ENDPOINTS ################ ############################################# -@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure) -def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None): +@client +@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` +def completion( + model, messages, # required params + # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create + functions=[], function_call="", # optional params + temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'), + presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", + # Optional liteLLM function params + *, force_timeout=60, azure=False, logger_fn=None, verbose=False + ): try: + # check if user passed in any of the OpenAI optional params + optional_params = get_optional_params( + functions=functions, function_call=function_call, + temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens, + presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user + ) if azure == True: # azure configs openai.api_type = "azure" @@ -51,21 +79,49 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l openai.api_version = os.environ.get("AZURE_API_VERSION") openai.api_key = os.environ.get("AZURE_API_KEY") ## LOGGING - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) ## COMPLETION CALL response = openai.ChatCompletion.create( engine=model, - messages = messages + messages = messages, + **optional_params ) - elif "replicate" in model: + elif model in litellm.open_ai_chat_completion_models: + openai.api_type = "openai" + openai.api_base = "https://api.openai.com/v1" + openai.api_version = None + openai.api_key = os.environ.get("OPENAI_API_KEY") + ## LOGGING + logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) + + ## COMPLETION CALL + response = openai.ChatCompletion.create( + model=model, + messages = messages, + **optional_params + ) + elif model in litellm.open_ai_text_completion_models: + openai.api_type = "openai" + openai.api_base = "https://api.openai.com/v1" + openai.api_version = None + openai.api_key = os.environ.get("OPENAI_API_KEY") + prompt = " ".join([message["content"] for message in messages]) + ## LOGGING + logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = openai.Completion.create( + model=model, + prompt = prompt + ) + elif "replicate" in model: # replicate defaults to os.environ.get("REPLICATE_API_TOKEN") # checking in case user set it to REPLICATE_API_KEY instead - if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): + if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): replicate_api_token = os.environ.get("REPLICATE_API_KEY") os.environ["REPLICATE_API_TOKEN"] = replicate_api_token prompt = " ".join([message["content"] for message in messages]) - input = [{"prompt": prompt}] - if max_tokens: + input = {"prompt": prompt} + if max_tokens != float('inf'): input["max_length"] = max_tokens # for t5 models input["max_new_tokens"] = max_tokens # for llama2 models ## LOGGING @@ -90,7 +146,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l ] } response = new_response - elif model in anthropic_models: + elif model in litellm.anthropic_models: #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY") prompt = f"{HUMAN_PROMPT}" for message in messages: @@ -103,9 +159,10 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l prompt += f"{HUMAN_PROMPT}{message['content']}" prompt += f"{AI_PROMPT}" anthropic = Anthropic() - if max_tokens: + # check if user passed in max_tokens != float('inf') + if max_tokens != float('inf'): max_tokens_to_sample = max_tokens - else: + else: max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries ## LOGGING logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) @@ -127,9 +184,9 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l } ] } - print(f"new response: {new_response}") + print_verbose(f"new response: {new_response}") response = new_response - elif model in cohere_models: + elif model in litellm.cohere_models: cohere_key = os.environ.get("COHERE_API_KEY") co = cohere.Client(cohere_key) prompt = " ".join([message["content"] for message in messages]) @@ -146,7 +203,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l "finish_reason": "stop", "index": 0, "message": { - "content": response[0], + "content": response[0].text, "role": "assistant" } } @@ -154,7 +211,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l } response = new_response - elif model in open_ai_chat_completion_models: + elif model in litellm.open_ai_chat_completion_models: openai.api_type = "openai" openai.api_base = "https://api.openai.com/v1" openai.api_version = None @@ -166,7 +223,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l model=model, messages = messages ) - elif model in open_ai_text_completion_models: + elif model in litellm.open_ai_text_completion_models: openai.api_type = "openai" openai.api_base = "https://api.openai.com/v1" openai.api_version = None @@ -181,249 +238,59 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l ) else: logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) + args = locals() + raise ValueError(f"No valid completion model args passed in - {args}") return response except Exception as e: - logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) - raise e + # log the original exception + logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e) + ## Map to OpenAI Exception + raise exception_type(model=model, original_exception=e) ### EMBEDDING ENDPOINTS #################### -@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/ -def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None): - response = None - if azure == True: - # azure configs - openai.api_type = "azure" - openai.api_base = os.environ.get("AZURE_API_BASE") - openai.api_version = os.environ.get("AZURE_API_VERSION") - openai.api_key = os.environ.get("AZURE_API_KEY") - ## LOGGING - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) - ## EMBEDDING CALL - response = openai.Embedding.create(input=input, engine=model) - print_verbose(f"response_value: {str(response)[:50]}") - elif model in open_ai_embedding_models: - openai.api_type = "openai" - openai.api_base = "https://api.openai.com/v1" - openai.api_version = None - openai.api_key = os.environ.get("OPENAI_API_KEY") - ## LOGGING - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) - ## EMBEDDING CALL - response = openai.Embedding.create(input=input, model=model) - print_verbose(f"response_value: {str(response)[:50]}") - else: - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) - - return response - - -### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc. -class litellm_client: - def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor - set_verbose = verbose - self.success_callback = success_callback - self.failure_callback = failure_callback - self.logger_fn = None # if user passes in their own logging function - self.callback_list = list(set(self.success_callback + self.failure_callback)) - self.set_callbacks() - - ## COMPLETION CALL - def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any: - try: - self.logger_fn = logger_fn - response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input) - my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread - my_thread.start() - return response - except Exception as e: - args = locals() # get all the param values - self.handle_failure(e, args) - raise e - - ## EMBEDDING CALL - def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any: - try: - self.logger_fn = logger_fn - response = embedding(model, input, azure=azure, logger_fn=self.handle_input) - my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread - my_thread.start() - return response - except Exception as e: - args = locals() # get all the param values - self.handle_failure(e, args) - raise e - - - def set_callbacks(self): #instantiate any external packages - for callback in self.callback_list: # only install what's required - if callback == "sentry": - try: - import sentry_sdk - except ImportError: - print_verbose("Package 'sentry_sdk' is missing. Installing it...") - subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk']) - import sentry_sdk - self.sentry_sdk = sentry_sdk - self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) - self.capture_exception = self.sentry_sdk.capture_exception - self.add_breadcrumb = self.sentry_sdk.add_breadcrumb - elif callback == "posthog": - try: - from posthog import Posthog - except: - print_verbose("Package 'posthog' is missing. Installing it...") - subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog']) - from posthog import Posthog - self.posthog = Posthog( - project_api_key=os.environ.get("POSTHOG_API_KEY"), - host=os.environ.get("POSTHOG_API_URL")) - elif callback == "slack": - try: - from slack_bolt import App - except ImportError: - print_verbose("Package 'slack_bolt' is missing. Installing it...") - subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt']) - from slack_bolt import App - self.slack_app = App( - token=os.environ.get("SLACK_API_TOKEN"), - signing_secret=os.environ.get("SLACK_API_SECRET") - ) - self.alerts_channel = os.environ["SLACK_API_CHANNEL"] - - def handle_input(self, model_call_details={}): - if len(model_call_details.keys()) > 0: - model = model_call_details["model"] if "model" in model_call_details else None - if model: - for callback in self.callback_list: - if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration - self.add_breadcrumb( - category=f'{model}', - message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)), - level='info', - ) - if self.logger_fn and callable(self.logger_fn): - self.logger_fn(model_call_details) - pass - - def handle_success(self, model, messages, additional_details): - success_handler = additional_details.pop("success_handler", None) - failure_handler = additional_details.pop("failure_handler", None) - additional_details["litellm_model"] = str(model) - additional_details["litellm_messages"] = str(messages) - for callback in self.success_callback: - try: - if callback == "posthog": - ph_obj = {} - for detail in additional_details: - ph_obj[detail] = additional_details[detail] - event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query" - if "user_id" in additional_details: - self.posthog.capture(additional_details["user_id"], event_name, ph_obj) - else: - self.posthog.capture(event_name, ph_obj) - pass - elif callback == "slack": - slack_msg = "" - if len(additional_details.keys()) > 0: - for detail in additional_details: - slack_msg += f"{detail}: {additional_details[detail]}\n" - slack_msg += f"Successful call" - self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg) - except: - pass - - if success_handler and callable(success_handler): - call_details = { - "model": model, - "messages": messages, - "additional_details": additional_details - } - success_handler(call_details) - pass - - def handle_failure(self, exception, args): - args.pop("self") - additional_details = args.pop("additional_details", {}) - - success_handler = additional_details.pop("success_handler", None) - failure_handler = additional_details.pop("failure_handler", None) - - for callback in self.failure_callback: - try: - if callback == "slack": - slack_msg = "" - for param in args: - slack_msg += f"{param}: {args[param]}\n" - if len(additional_details.keys()) > 0: - for detail in additional_details: - slack_msg += f"{detail}: {additional_details[detail]}\n" - slack_msg += f"Traceback: {traceback.format_exc()}" - self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg) - elif callback == "sentry": - self.capture_exception(exception) - elif callback == "posthog": - if len(additional_details.keys()) > 0: - ph_obj = {} - for param in args: - ph_obj[param] += args[param] - for detail in additional_details: - ph_obj[detail] = additional_details[detail] - event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query" - if "user_id" in additional_details: - self.posthog.capture(additional_details["user_id"], event_name, ph_obj) - else: - self.posthog.capture(event_name, ph_obj) - else: - pass - except: - print(f"got an error calling {callback} - {traceback.format_exc()}") - - if failure_handler and callable(failure_handler): - call_details = { - "exception": exception, - "additional_details": additional_details - } - failure_handler(call_details) - pass -####### HELPER FUNCTIONS ################ - -#Logging function -> log the exact model details + what's being sent | Non-Blocking -def logging(model, input, azure=False, additional_args={}, logger_fn=None): +@client +@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` +def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None): try: - model_call_details = {} - model_call_details["model"] = model - model_call_details["input"] = input - model_call_details["azure"] = azure - model_call_details["additional_args"] = additional_args - if logger_fn and callable(logger_fn): - try: - # log additional call details -> api key, etc. - if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models: - model_call_details["api_type"] = openai.api_type - model_call_details["api_base"] = openai.api_base - model_call_details["api_version"] = openai.api_version - model_call_details["api_key"] = openai.api_key - elif "replicate" in model: - model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") - elif model in anthropic_models: - model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") - elif model in cohere_models: - model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") - - logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object - except: - print_verbose(f"Basic model call details: {model_call_details}") - print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}") - pass - else: - print_verbose(f"Basic model call details: {model_call_details}") - pass - except: - pass - -## Set verbose to true -> ```litellm.verbose = True``` + response = None + if azure == True: + # azure configs + openai.api_type = "azure" + openai.api_base = os.environ.get("AZURE_API_BASE") + openai.api_version = os.environ.get("AZURE_API_VERSION") + openai.api_key = os.environ.get("AZURE_API_KEY") + ## LOGGING + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + ## EMBEDDING CALL + response = openai.Embedding.create(input=input, engine=model) + print_verbose(f"response_value: {str(response)[:50]}") + elif model in litellm.open_ai_embedding_models: + openai.api_type = "openai" + openai.api_base = "https://api.openai.com/v1" + openai.api_version = None + openai.api_key = os.environ.get("OPENAI_API_KEY") + ## LOGGING + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + ## EMBEDDING CALL + response = openai.Embedding.create(input=input, model=model) + print_verbose(f"response_value: {str(response)[:50]}") + else: + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + args = locals() + raise ValueError(f"No valid embedding model args passed in - {args}") + + return response + except Exception as e: + # log the original exception + logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e) + ## Map to OpenAI Exception + raise exception_type(model=model, original_exception=e) +####### HELPER FUNCTIONS ################ +## Set verbose to true -> ```litellm.set_verbose = True``` def print_verbose(print_statement): - if set_verbose: + if litellm.set_verbose: print(f"LiteLLM: {print_statement}") - print("Get help - https://discord.com/invite/wuPM9dRgDw") \ No newline at end of file + if random.random() <= 0.3: + print("Get help - https://discord.com/invite/wuPM9dRgDw") + diff --git a/build/lib/litellm/timeout.py b/build/lib/litellm/timeout.py new file mode 100644 index 0000000000..8cbe650ed6 --- /dev/null +++ b/build/lib/litellm/timeout.py @@ -0,0 +1,80 @@ +""" +Module containing "timeout" decorator for sync and async callables. +""" + +import asyncio + +from concurrent import futures +from inspect import iscoroutinefunction +from functools import wraps +from threading import Thread +from openai.error import Timeout + + +def timeout( + timeout_duration: float = None, exception_to_raise = Timeout +): + """ + Wraps a function to raise the specified exception if execution time + is greater than the specified timeout. + + Works with both synchronous and asynchronous callables, but with synchronous ones will introduce + some overhead due to the backend use of threads and asyncio. + + :param float timeout_duration: Timeout duration in seconds. If none callable won't time out. + :param OpenAIError exception_to_raise: Exception to raise when the callable times out. + Defaults to TimeoutError. + :return: The decorated function. + :rtype: callable + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + async def async_func(): + return func(*args, **kwargs) + + thread = _LoopWrapper() + thread.start() + future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop) + try: + local_timeout_duration = timeout_duration + if "force_timeout" in kwargs: + local_timeout_duration = kwargs["force_timeout"] + result = future.result(timeout=local_timeout_duration) + except futures.TimeoutError: + thread.stop_loop() + raise exception_to_raise() + thread.stop_loop() + return result + + @wraps(func) + async def async_wrapper(*args, **kwargs): + try: + value = await asyncio.wait_for( + func(*args, **kwargs), timeout=timeout_duration + ) + return value + except asyncio.TimeoutError: + raise exception_to_raise() + + if iscoroutinefunction(func): + return async_wrapper + return wrapper + + return decorator + + +class _LoopWrapper(Thread): + def __init__(self): + super().__init__(daemon=True) + self.loop = asyncio.new_event_loop() + + def run(self) -> None: + self.loop.run_forever() + self.loop.call_soon_threadsafe(self.loop.close) + + def stop_loop(self): + for task in asyncio.all_tasks(self.loop): + task.cancel() + self.loop.call_soon_threadsafe(self.loop.stop) \ No newline at end of file diff --git a/build/lib/litellm/utils.py b/build/lib/litellm/utils.py new file mode 100644 index 0000000000..67a3d6df74 --- /dev/null +++ b/build/lib/litellm/utils.py @@ -0,0 +1,316 @@ +import dotenv, json, traceback, threading +import subprocess, os +import litellm, openai +import random, uuid, requests +from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError +####### ENVIRONMENT VARIABLES ################### +dotenv.load_dotenv() # Loading env variables using dotenv +sentry_sdk_instance = None +capture_exception = None +add_breadcrumb = None +posthog = None +slack_app = None +alerts_channel = None +callback_list = [] +user_logger_fn = None +additional_details = {} + +def print_verbose(print_statement): + if litellm.set_verbose: + print(f"LiteLLM: {print_statement}") + if random.random() <= 0.3: + print("Get help - https://discord.com/invite/wuPM9dRgDw") + +####### LOGGING ################### +#Logging function -> log the exact model details + what's being sent | Non-Blocking +def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None): + try: + model_call_details = {} + model_call_details["model"] = model + model_call_details["azure"] = azure + # log exception details + if exception: + model_call_details["original_exception"] = exception + + if litellm.telemetry: + safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`. + + model_call_details["input"] = input + # log additional call details -> api key, etc. + if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models: + model_call_details["api_type"] = openai.api_type + model_call_details["api_base"] = openai.api_base + model_call_details["api_version"] = openai.api_version + model_call_details["api_key"] = openai.api_key + elif "replicate" in model: + model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") + elif model in litellm.anthropic_models: + model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") + elif model in litellm.cohere_models: + model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") + model_call_details["additional_args"] = additional_args + ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs + print_verbose(f"Basic model call details: {model_call_details}") + if logger_fn and callable(logger_fn): + try: + logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object + except: + print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}") + except: + traceback.print_exc() + pass + +####### CLIENT ################### +# make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking +def client(original_function): + def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. + try: + global callback_list, add_breadcrumb + if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: + callback_list = list(set(litellm.success_callback + litellm.failure_callback)) + set_callbacks(callback_list=callback_list) + if add_breadcrumb: + add_breadcrumb( + category="litellm.llm_call", + message=f"Positional Args: {args}, Keyword Args: {kwargs}", + level="info", + ) + except: # DO NOT BLOCK running the function because of this + print_verbose(f"[Non-Blocking] {traceback.format_exc()}") + pass + + def wrapper(*args, **kwargs): + try: + function_setup(args, kwargs) + ## MODEL CALL + result = original_function(*args, **kwargs) + ## LOG SUCCESS + my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread + my_thread.start() + return result + except Exception as e: + traceback_exception = traceback.format_exc() + my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread + my_thread.start() + raise e + return wrapper + +####### HELPER FUNCTIONS ################ +def set_callbacks(callback_list): + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel + try: + for callback in callback_list: + if callback == "sentry": + try: + import sentry_sdk + except ImportError: + print_verbose("Package 'sentry_sdk' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk']) + import sentry_sdk + sentry_sdk_instance = sentry_sdk + sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) + capture_exception = sentry_sdk_instance.capture_exception + add_breadcrumb = sentry_sdk_instance.add_breadcrumb + elif callback == "posthog": + try: + from posthog import Posthog + except ImportError: + print_verbose("Package 'posthog' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog']) + from posthog import Posthog + posthog = Posthog( + project_api_key=os.environ.get("POSTHOG_API_KEY"), + host=os.environ.get("POSTHOG_API_URL")) + elif callback == "slack": + try: + from slack_bolt import App + except ImportError: + print_verbose("Package 'slack_bolt' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt']) + from slack_bolt import App + slack_app = App( + token=os.environ.get("SLACK_API_TOKEN"), + signing_secret=os.environ.get("SLACK_API_SECRET") + ) + alerts_channel = os.environ["SLACK_API_CHANNEL"] + print_verbose(f"Initialized Slack App: {slack_app}") + except: + pass + + +def handle_failure(exception, traceback_exception, args, kwargs): + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel + try: + print_verbose(f"handle_failure args: {args}") + print_verbose(f"handle_failure kwargs: {kwargs}") + + success_handler = additional_details.pop("success_handler", None) + failure_handler = additional_details.pop("failure_handler", None) + + additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query") + print_verbose(f"self.failure_callback: {litellm.failure_callback}") + + print_verbose(f"additional_details: {additional_details}") + for callback in litellm.failure_callback: + try: + if callback == "slack": + slack_msg = "" + if len(kwargs) > 0: + for key in kwargs: + slack_msg += f"{key}: {kwargs[key]}\n" + if len(args) > 0: + for i, arg in enumerate(args): + slack_msg += f"LiteLLM_Args_{str(i)}: {arg}" + for detail in additional_details: + slack_msg += f"{detail}: {additional_details[detail]}\n" + slack_msg += f"Traceback: {traceback_exception}" + slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg) + elif callback == "sentry": + capture_exception(exception) + elif callback == "posthog": + print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}") + ph_obj = {} + if len(kwargs) > 0: + ph_obj = kwargs + if len(args) > 0: + for i, arg in enumerate(args): + ph_obj["litellm_args_" + str(i)] = arg + for detail in additional_details: + ph_obj[detail] = additional_details[detail] + event_name = additional_details["Event_Name"] + print_verbose(f"ph_obj: {ph_obj}") + print_verbose(f"PostHog Event Name: {event_name}") + if "user_id" in additional_details: + posthog.capture(additional_details["user_id"], event_name, ph_obj) + else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python + unique_id = str(uuid.uuid4()) + posthog.capture(unique_id, event_name) + print_verbose(f"successfully logged to PostHog!") + except: + print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}") + pass + + if failure_handler and callable(failure_handler): + call_details = { + "exception": exception, + "additional_details": additional_details + } + failure_handler(call_details) + pass + except: + pass + +def handle_success(*args, **kwargs): + try: + success_handler = additional_details.pop("success_handler", None) + failure_handler = additional_details.pop("failure_handler", None) + additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query") + for callback in litellm.success_callback: + try: + if callback == "posthog": + ph_obj = {} + for detail in additional_details: + ph_obj[detail] = additional_details[detail] + event_name = additional_details["Event_Name"] + if "user_id" in additional_details: + posthog.capture(additional_details["user_id"], event_name, ph_obj) + else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python + unique_id = str(uuid.uuid4()) + posthog.capture(unique_id, event_name, ph_obj) + pass + elif callback == "slack": + slack_msg = "" + for detail in additional_details: + slack_msg += f"{detail}: {additional_details[detail]}\n" + slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg) + except: + pass + + if success_handler and callable(success_handler): + success_handler(args, kwargs) + pass + except: + pass + + +def exception_type(model, original_exception): + try: + if isinstance(original_exception, OpenAIError): + # Handle the OpenAIError + raise original_exception + elif model: + error_str = str(original_exception) + if isinstance(original_exception, BaseException): + exception_type = type(original_exception).__name__ + else: + exception_type = "" + if "claude" in model: #one of the anthropics + if "status_code" in original_exception: + print_verbose(f"status_code: {original_exception.status_code}") + if original_exception.status_code == 401: + raise AuthenticationError(f"AnthropicException - {original_exception.message}") + elif original_exception.status_code == 400: + raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}") + elif original_exception.status_code == 429: + raise RateLimitError(f"AnthropicException - {original_exception.message}") + elif "replicate" in model: + if "Incorrect authentication token" in error_str: + raise AuthenticationError(f"ReplicateException - {error_str}") + elif exception_type == "ModelError": + raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}") + elif "Request was throttled" in error_str: + raise RateLimitError(f"ReplicateException - {error_str}") + elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side + raise ServiceUnavailableError(f"ReplicateException - {error_str}") + elif model == "command-nightly": #Cohere + if "invalid api token" in error_str or "No API key provided." in error_str: + raise AuthenticationError(f"CohereException - {error_str}") + elif "too many tokens" in error_str: + raise InvalidRequestError(f"CohereException - {error_str}", f"{model}") + elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min) + raise RateLimitError(f"CohereException - {original_exception.message}") + raise original_exception # base case - return the original exception + else: + raise original_exception + except: + raise original_exception + +def safe_crash_reporting(model=None, exception=None, azure=None): + data = { + "model": model, + "exception": str(exception), + "azure": azure + } + threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start() + +def litellm_telemetry(data): + # Load or generate the UUID + uuid_file = 'litellm_uuid.txt' + try: + # Try to open the file and load the UUID + with open(uuid_file, 'r') as file: + uuid_value = file.read() + if uuid_value: + uuid_value = uuid_value.strip() + else: + raise FileNotFoundError + except FileNotFoundError: + # Generate a new UUID if the file doesn't exist or is empty + new_uuid = uuid.uuid4() + uuid_value = str(new_uuid) + with open(uuid_file, 'w') as file: + file.write(uuid_value) + + # Prepare the data to send to localhost:3000 + payload = { + 'uuid': uuid_value, + 'data': data + } + print_verbose(f"payload: {payload}") + try: + # Make the POST request to localhost:3000 + response = requests.post('https://litellm.berri.ai/logging', json=payload) + response.raise_for_status() # Raise an exception for HTTP errors + except requests.exceptions.RequestException as e: + # Handle any errors in the request + pass \ No newline at end of file diff --git a/dist/litellm-0.1.2-py3-none-any.whl b/dist/litellm-0.1.2-py3-none-any.whl deleted file mode 100644 index 07cfc44175..0000000000 Binary files a/dist/litellm-0.1.2-py3-none-any.whl and /dev/null differ diff --git a/dist/litellm-0.1.2.tar.gz b/dist/litellm-0.1.2.tar.gz deleted file mode 100644 index 853c7db396..0000000000 Binary files a/dist/litellm-0.1.2.tar.gz and /dev/null differ diff --git a/dist/litellm-0.1.216-py3-none-any.whl b/dist/litellm-0.1.216-py3-none-any.whl new file mode 100644 index 0000000000..94866790a2 Binary files /dev/null and b/dist/litellm-0.1.216-py3-none-any.whl differ diff --git a/dist/litellm-0.1.216.tar.gz b/dist/litellm-0.1.216.tar.gz new file mode 100644 index 0000000000..73c8ec6d26 Binary files /dev/null and b/dist/litellm-0.1.216.tar.gz differ diff --git a/litellm.egg-info/PKG-INFO b/litellm.egg-info/PKG-INFO index 021d53b6ac..a9e70364be 100644 --- a/litellm.egg-info/PKG-INFO +++ b/litellm.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: litellm -Version: 0.1.207 +Version: 0.1.216 Summary: Library to easily interface with LLM API providers Author: BerriAI License-File: LICENSE diff --git a/litellm.egg-info/SOURCES.txt b/litellm.egg-info/SOURCES.txt index d7e6535970..9355b612b0 100644 --- a/litellm.egg-info/SOURCES.txt +++ b/litellm.egg-info/SOURCES.txt @@ -1,5 +1,6 @@ LICENSE README.md +pyproject.toml setup.py litellm/__init__.py litellm/main.py diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index e487c18c97..3dd00dd19d 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 153207bb97..43f2fe70a7 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 0d9aacf693..fe39afd2e3 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/tests/test_client.py b/litellm/tests/test_client.py index 9129b5853c..eb99634015 100644 --- a/litellm/tests/test_client.py +++ b/litellm/tests/test_client.py @@ -57,3 +57,4 @@ def test_good_azure_embedding(): print(f"response: {str(response)[:50]}") except Exception as e: pytest.fail(f"Error occurred: {e}") + diff --git a/litellm/tests/test_no_client.py b/litellm/tests/test_no_client.py new file mode 100644 index 0000000000..79c47d0da7 --- /dev/null +++ b/litellm/tests/test_no_client.py @@ -0,0 +1,23 @@ +#### What this tests #### +# This tests error logging (with custom user functions) for the `completion` + `embedding` endpoints without callbacks (i.e. slack, posthog, etc. not set) +# Requirements: Remove any env keys you have related to slack/posthog/etc. + anthropic api key (cause an exception) + +import sys, os +import traceback +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import litellm +from litellm import embedding, completion + +litellm.set_verbose = True + +model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"] + +user_message = "Hello, how are you?" +messages = [{ "content": user_message,"role": "user"}] + +for model in model_fallback_list: + try: + response = embedding(model="text-embedding-ada-002", input=[user_message]) + response = completion(model=model, messages=messages) + except Exception as e: + print(f"error occurred: {traceback.format_exc()}") diff --git a/litellm/utils.py b/litellm/utils.py index e2b4f56554..67a3d6df74 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -234,41 +234,45 @@ def handle_success(*args, **kwargs): def exception_type(model, original_exception): - if isinstance(original_exception, OpenAIError): - # Handle the OpenAIError - raise original_exception - elif model: - error_str = str(original_exception) - if isinstance(original_exception, BaseException): - exception_type = type(original_exception).__name__ + try: + if isinstance(original_exception, OpenAIError): + # Handle the OpenAIError + raise original_exception + elif model: + error_str = str(original_exception) + if isinstance(original_exception, BaseException): + exception_type = type(original_exception).__name__ + else: + exception_type = "" + if "claude" in model: #one of the anthropics + if "status_code" in original_exception: + print_verbose(f"status_code: {original_exception.status_code}") + if original_exception.status_code == 401: + raise AuthenticationError(f"AnthropicException - {original_exception.message}") + elif original_exception.status_code == 400: + raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}") + elif original_exception.status_code == 429: + raise RateLimitError(f"AnthropicException - {original_exception.message}") + elif "replicate" in model: + if "Incorrect authentication token" in error_str: + raise AuthenticationError(f"ReplicateException - {error_str}") + elif exception_type == "ModelError": + raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}") + elif "Request was throttled" in error_str: + raise RateLimitError(f"ReplicateException - {error_str}") + elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side + raise ServiceUnavailableError(f"ReplicateException - {error_str}") + elif model == "command-nightly": #Cohere + if "invalid api token" in error_str or "No API key provided." in error_str: + raise AuthenticationError(f"CohereException - {error_str}") + elif "too many tokens" in error_str: + raise InvalidRequestError(f"CohereException - {error_str}", f"{model}") + elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min) + raise RateLimitError(f"CohereException - {original_exception.message}") + raise original_exception # base case - return the original exception else: - exception_type = "" - if "claude" in model: #one of the anthropics - print_verbose(f"status_code: {original_exception.status_code}") - if original_exception.status_code == 401: - raise AuthenticationError(f"AnthropicException - {original_exception.message}") - elif original_exception.status_code == 400: - raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}") - elif original_exception.status_code == 429: - raise RateLimitError(f"AnthropicException - {original_exception.message}") - elif "replicate" in model: - if "Incorrect authentication token" in error_str: - raise AuthenticationError(f"ReplicateException - {error_str}") - elif exception_type == "ModelError": - raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}") - elif "Request was throttled" in error_str: - raise RateLimitError(f"ReplicateException - {error_str}") - elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side - raise ServiceUnavailableError(f"ReplicateException - {error_str}") - elif model == "command-nightly": #Cohere - if "invalid api token" in error_str or "No API key provided." in error_str: - raise AuthenticationError(f"CohereException - {error_str}") - elif "too many tokens" in error_str: - raise InvalidRequestError(f"CohereException - {error_str}", f"{model}") - elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min) - raise RateLimitError(f"CohereException - {original_exception.message}") - raise original_exception # base case - return the original exception - else: + raise original_exception + except: raise original_exception def safe_crash_reporting(model=None, exception=None, azure=None): @@ -277,11 +281,9 @@ def safe_crash_reporting(model=None, exception=None, azure=None): "exception": str(exception), "azure": azure } - print(f"data in crash reporting: {data}") threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start() def litellm_telemetry(data): - print(f"data in in litellm telemetry: {data}") # Load or generate the UUID uuid_file = 'litellm_uuid.txt' try: @@ -290,7 +292,6 @@ def litellm_telemetry(data): uuid_value = file.read() if uuid_value: uuid_value = uuid_value.strip() - print(f"Loaded UUID: {uuid_value}") else: raise FileNotFoundError except FileNotFoundError: @@ -299,7 +300,6 @@ def litellm_telemetry(data): uuid_value = str(new_uuid) with open(uuid_file, 'w') as file: file.write(uuid_value) - print(f"Generated and stored UUID: {uuid_value}") # Prepare the data to send to localhost:3000 payload = { @@ -311,7 +311,6 @@ def litellm_telemetry(data): # Make the POST request to localhost:3000 response = requests.post('https://litellm.berri.ai/logging', json=payload) response.raise_for_status() # Raise an exception for HTTP errors - print('Request successfully sent!') except requests.exceptions.RequestException as e: # Handle any errors in the request - print(f'Error: {e}') \ No newline at end of file + pass \ No newline at end of file diff --git a/setup.py b/setup.py index ff035141ca..0c8a6a7c46 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='litellm', - version='0.1.214', + version='0.1.216', description='Library to easily interface with LLM API providers', author='BerriAI', packages=[