diff --git a/build/lib/litellm/__init__.py b/build/lib/litellm/__init__.py deleted file mode 100644 index 191a8ed26..000000000 --- a/build/lib/litellm/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -success_callback = [] -failure_callback = [] -set_verbose=False -telemetry=True -max_tokens = 256 # OpenAI Defaults -retry = True # control tenacity retries. -####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone -api_base = None -headers = None -####### COMPLETION MODELS ################### -open_ai_chat_completion_models = [ - "gpt-4", - "gpt-4-0613", - "gpt-4-32k", - "gpt-4-32k-0613", - ################# - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-16k-0613", - 'gpt-3.5-turbo', - 'gpt-3.5-turbo-16k-0613', - 'gpt-3.5-turbo-16k' -] -open_ai_text_completion_models = [ - 'text-davinci-003' -] - -cohere_models = [ - 'command-nightly', -] - -anthropic_models = [ - "claude-2", - "claude-instant-1" -] - -model_list = open_ai_chat_completion_models + open_ai_text_completion_models + cohere_models + anthropic_models - -####### EMBEDDING MODELS ################### -open_ai_embedding_models = [ - 'text-embedding-ada-002' -] -from .timeout import timeout -from .utils import client, logging, exception_type # Import all the symbols from main.py -from .main import * # Import all the symbols from main.py -from .integrations import * \ No newline at end of file diff --git a/build/lib/litellm/integrations/__init__.py b/build/lib/litellm/integrations/__init__.py deleted file mode 100644 index b9742821a..000000000 --- a/build/lib/litellm/integrations/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import * \ No newline at end of file diff --git a/build/lib/litellm/integrations/helicone.py b/build/lib/litellm/integrations/helicone.py deleted file mode 100644 index 6b3d61965..000000000 --- a/build/lib/litellm/integrations/helicone.py +++ /dev/null @@ -1,73 +0,0 @@ -#### What this does #### -# On success, logs events to Helicone -import dotenv, os -import requests -from anthropic import HUMAN_PROMPT, AI_PROMPT -dotenv.load_dotenv() # Loading env variables using dotenv -import traceback -class HeliconeLogger: - # Class variables or attributes - helicone_model_list = ["gpt", "claude"] - def __init__(self): - # Instance variables - self.provider_url = "https://api.openai.com/v1" - self.key = os.getenv('HELICONE_API_KEY') - - def claude_mapping(self, model, messages, response_obj): - prompt = f"{HUMAN_PROMPT}" - for message in messages: - if "role" in message: - if message["role"] == "user": - prompt += f"{HUMAN_PROMPT}{message['content']}" - else: - prompt += f"{AI_PROMPT}{message['content']}" - else: - prompt += f"{HUMAN_PROMPT}{message['content']}" - prompt += f"{AI_PROMPT}" - claude_provider_request = {"model": model, "prompt": prompt} - - claude_response_obj = {"completion": response_obj['choices'][0]['message']['content'], "model": model, "stop_reason": "stop_sequence"} - - return claude_provider_request, claude_response_obj - - def log_success(self, model, messages, response_obj, start_time, end_time, print_verbose): - # Method definition - try: - print_verbose(f"Helicone Logging - Enters logging function for model {model}") - model = model if any(accepted_model in model for accepted_model in self.helicone_model_list) else "gpt-3.5-turbo" - provider_request = {"model": model, "messages": messages} - - if "claude" in model: - provider_request, response_obj = self.claude_mapping(model=model, messages=messages, response_obj=response_obj) - - providerResponse = { - "json": response_obj, - "headers": {"openai-version": "2020-10-01"}, - "status": 200 - } - - # Code to be executed - url = "https://api.hconeai.com/oai/v1/log" - headers = { - 'Authorization': f'Bearer {self.key}', - 'Content-Type': 'application/json' - } - start_time_seconds = int(start_time.timestamp()) - start_time_milliseconds = int((start_time.timestamp() - start_time_seconds) * 1000) - end_time_seconds = int(end_time.timestamp()) - end_time_milliseconds = int((end_time.timestamp() - end_time_seconds) * 1000) - data = { - "providerRequest": {"url": self.provider_url, "json": provider_request, "meta": {"Helicone-Auth": f"Bearer {self.key}"}}, - "providerResponse": providerResponse, - "timing": {"startTime": {"seconds": start_time_seconds, "milliseconds": start_time_milliseconds}, "endTime": {"seconds": end_time_seconds, "milliseconds": end_time_milliseconds}} # {"seconds": .., "milliseconds": ..} - } - response = requests.post(url, headers=headers, json=data) - if response.status_code == 200: - print_verbose("Helicone Logging - Success!") - else: - print_verbose(f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}") - print_verbose(f"Helicone Logging - Error {response.text}") - except: - # traceback.print_exc() - print_verbose(f"Helicone Logging Error - {traceback.format_exc()}") - pass \ No newline at end of file diff --git a/build/lib/litellm/main.py b/build/lib/litellm/main.py deleted file mode 100644 index f35af8013..000000000 --- a/build/lib/litellm/main.py +++ /dev/null @@ -1,315 +0,0 @@ -import os, openai, cohere, replicate, sys -from typing import Any -from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT -import traceback -from functools import partial -import dotenv -import traceback -import litellm -from litellm import client, logging, exception_type, timeout, success_callback, failure_callback -import random -import asyncio -from tenacity import ( - retry, - stop_after_attempt, - wait_random_exponential, -) # for exponential backoff -####### ENVIRONMENT VARIABLES ################### -dotenv.load_dotenv() # Loading env variables using dotenv - -def get_optional_params( - # 12 optional params - functions = [], - function_call = "", - temperature = 1, - top_p = 1, - n = 1, - stream = False, - stop = None, - max_tokens = float('inf'), - presence_penalty = 0, - frequency_penalty = 0, - logit_bias = {}, - user = "", - deployment_id = None -): - optional_params = {} - if functions != []: - optional_params["functions"] = functions - if function_call != "": - optional_params["function_call"] = function_call - if temperature != 1: - optional_params["temperature"] = temperature - if top_p != 1: - optional_params["top_p"] = top_p - if n != 1: - optional_params["n"] = n - if stream: - optional_params["stream"] = stream - if stop != None: - optional_params["stop"] = stop - if max_tokens != float('inf'): - optional_params["max_tokens"] = max_tokens - if presence_penalty != 0: - optional_params["presence_penalty"] = presence_penalty - if frequency_penalty != 0: - optional_params["frequency_penalty"] = frequency_penalty - if logit_bias != {}: - optional_params["logit_bias"] = logit_bias - if user != "": - optional_params["user"] = user - if deployment_id != None: - optional_params["deployment_id"] = user - return optional_params - -####### COMPLETION ENDPOINTS ################ -############################################# -async def acompletion(*args, **kwargs): - loop = asyncio.get_event_loop() - - # Use a partial function to pass your keyword arguments - func = partial(completion, *args, **kwargs) - - # Call the synchronous function using run_in_executor - return await loop.run_in_executor(None, func) - -@client -@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False` -@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` -def completion( - model, messages, # required params - # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create - functions=[], function_call="", # optional params - temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'), - presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None, - # Optional liteLLM function params - *, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False - ): - try: - # check if user passed in any of the OpenAI optional params - optional_params = get_optional_params( - functions=functions, function_call=function_call, - temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens, - presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id - ) - if azure == True: - # azure configs - openai.api_type = "azure" - openai.api_base = litellm.api_base if litellm.api_base is not None else os.environ.get("AZURE_API_BASE") - openai.api_version = os.environ.get("AZURE_API_VERSION") - openai.api_key = api_key if api_key is not None else os.environ.get("AZURE_API_KEY") - ## LOGGING - logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) - ## COMPLETION CALL - if litellm.headers: - response = openai.ChatCompletion.create( - engine=model, - messages = messages, - headers = litellm.headers, - **optional_params, - ) - else: - response = openai.ChatCompletion.create( - engine=model, - messages = messages, - **optional_params - ) - elif model in litellm.open_ai_chat_completion_models: - openai.api_type = "openai" - openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1" - openai.api_version = None - openai.api_key = api_key if api_key is not None else os.environ.get("OPENAI_API_KEY") - ## LOGGING - logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) - ## COMPLETION CALL - if litellm.headers: - response = openai.ChatCompletion.create( - model=model, - messages = messages, - headers = litellm.headers, - **optional_params - ) - else: - response = openai.ChatCompletion.create( - model=model, - messages = messages, - **optional_params - ) - elif model in litellm.open_ai_text_completion_models: - openai.api_type = "openai" - openai.api_base = litellm.api_base if litellm.api_base is not None else "https://api.openai.com/v1" - openai.api_version = None - openai.api_key = api_key if api_key is not None else os.environ.get("OPENAI_API_KEY") - prompt = " ".join([message["content"] for message in messages]) - ## LOGGING - logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) - ## COMPLETION CALL - if litellm.headers: - response = openai.Completion.create( - model=model, - prompt = prompt, - headers = litellm.headers, - ) - else: - response = openai.Completion.create( - model=model, - prompt = prompt - ) - elif "replicate" in model: - # replicate defaults to os.environ.get("REPLICATE_API_TOKEN") - # checking in case user set it to REPLICATE_API_KEY instead - if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): - replicate_api_token = os.environ.get("REPLICATE_API_KEY") - os.environ["REPLICATE_API_TOKEN"] = replicate_api_token - elif api_key: - os.environ["REPLICATE_API_TOKEN"] = api_key - prompt = " ".join([message["content"] for message in messages]) - input = {"prompt": prompt} - if max_tokens != float('inf'): - input["max_length"] = max_tokens # for t5 models - input["max_new_tokens"] = max_tokens # for llama2 models - ## LOGGING - logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) - ## COMPLETION CALL - output = replicate.run( - model, - input=input) - response = "" - for item in output: - response += item - new_response = { - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "message": { - "content": response, - "role": "assistant" - } - } - ] - } - response = new_response - elif model in litellm.anthropic_models: - #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY") - if api_key: - os.environ["ANTHROPIC_API_KEY"] = api_key - prompt = f"{HUMAN_PROMPT}" - for message in messages: - if "role" in message: - if message["role"] == "user": - prompt += f"{HUMAN_PROMPT}{message['content']}" - else: - prompt += f"{AI_PROMPT}{message['content']}" - else: - prompt += f"{HUMAN_PROMPT}{message['content']}" - prompt += f"{AI_PROMPT}" - anthropic = Anthropic() - # check if user passed in max_tokens != float('inf') - if max_tokens != float('inf'): - max_tokens_to_sample = max_tokens - else: - max_tokens_to_sample = litellm.max_tokens # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries - ## LOGGING - logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) - ## COMPLETION CALL - completion = anthropic.completions.create( - model=model, - prompt=prompt, - max_tokens_to_sample=max_tokens_to_sample - ) - new_response = { - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "message": { - "content": completion.completion, - "role": "assistant" - } - } - ] - } - print_verbose(f"new response: {new_response}") - response = new_response - elif model in litellm.cohere_models: - cohere_key = api_key if api_key is not None else os.environ.get("COHERE_API_KEY") - co = cohere.Client(cohere_key) - prompt = " ".join([message["content"] for message in messages]) - ## LOGGING - logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) - ## COMPLETION CALL - response = co.generate( - model=model, - prompt = prompt - ) - new_response = { - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "message": { - "content": response[0].text, - "role": "assistant" - } - } - ], - } - response = new_response - else: - logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) - args = locals() - raise ValueError(f"No valid completion model args passed in - {args}") - return response - except Exception as e: - # log the original exception - logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e) - ## Map to OpenAI Exception - raise exception_type(model=model, original_exception=e) - -### EMBEDDING ENDPOINTS #################### -@client -@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` -def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None): - try: - response = None - if azure == True: - # azure configs - openai.api_type = "azure" - openai.api_base = os.environ.get("AZURE_API_BASE") - openai.api_version = os.environ.get("AZURE_API_VERSION") - openai.api_key = os.environ.get("AZURE_API_KEY") - ## LOGGING - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) - ## EMBEDDING CALL - response = openai.Embedding.create(input=input, engine=model) - print_verbose(f"response_value: {str(response)[:50]}") - elif model in litellm.open_ai_embedding_models: - openai.api_type = "openai" - openai.api_base = "https://api.openai.com/v1" - openai.api_version = None - openai.api_key = os.environ.get("OPENAI_API_KEY") - ## LOGGING - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) - ## EMBEDDING CALL - response = openai.Embedding.create(input=input, model=model) - print_verbose(f"response_value: {str(response)[:50]}") - else: - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) - args = locals() - raise ValueError(f"No valid embedding model args passed in - {args}") - - return response - except Exception as e: - # log the original exception - logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e) - ## Map to OpenAI Exception - raise exception_type(model=model, original_exception=e) -####### HELPER FUNCTIONS ################ -## Set verbose to true -> ```litellm.set_verbose = True``` -def print_verbose(print_statement): - if litellm.set_verbose: - print(f"LiteLLM: {print_statement}") - if random.random() <= 0.3: - print("Get help - https://discord.com/invite/wuPM9dRgDw") - diff --git a/build/lib/litellm/timeout.py b/build/lib/litellm/timeout.py deleted file mode 100644 index 37bbbffc1..000000000 --- a/build/lib/litellm/timeout.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Module containing "timeout" decorator for sync and async callables. -""" - -import asyncio - -from concurrent import futures -from inspect import iscoroutinefunction -from functools import wraps -from threading import Thread -from openai.error import Timeout - - -def timeout( - timeout_duration: float = None, exception_to_raise = Timeout -): - """ - Wraps a function to raise the specified exception if execution time - is greater than the specified timeout. - - Works with both synchronous and asynchronous callables, but with synchronous ones will introduce - some overhead due to the backend use of threads and asyncio. - - :param float timeout_duration: Timeout duration in seconds. If none callable won't time out. - :param OpenAIError exception_to_raise: Exception to raise when the callable times out. - Defaults to TimeoutError. - :return: The decorated function. - :rtype: callable - """ - - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - async def async_func(): - return func(*args, **kwargs) - - thread = _LoopWrapper() - thread.start() - future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop) - local_timeout_duration = timeout_duration - if "force_timeout" in kwargs: - local_timeout_duration = kwargs["force_timeout"] - try: - result = future.result(timeout=local_timeout_duration) - except futures.TimeoutError: - thread.stop_loop() - raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).") - thread.stop_loop() - return result - - @wraps(func) - async def async_wrapper(*args, **kwargs): - local_timeout_duration = timeout_duration - if "force_timeout" in kwargs: - local_timeout_duration = kwargs["force_timeout"] - try: - value = await asyncio.wait_for( - func(*args, **kwargs), timeout=timeout_duration - ) - return value - except asyncio.TimeoutError: - raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).") - - if iscoroutinefunction(func): - return async_wrapper - return wrapper - - return decorator - - -class _LoopWrapper(Thread): - def __init__(self): - super().__init__(daemon=True) - self.loop = asyncio.new_event_loop() - - def run(self) -> None: - self.loop.run_forever() - self.loop.call_soon_threadsafe(self.loop.close) - - def stop_loop(self): - for task in asyncio.all_tasks(self.loop): - task.cancel() - self.loop.call_soon_threadsafe(self.loop.stop) \ No newline at end of file diff --git a/build/lib/litellm/utils.py b/build/lib/litellm/utils.py deleted file mode 100644 index b0050226b..000000000 --- a/build/lib/litellm/utils.py +++ /dev/null @@ -1,333 +0,0 @@ -import dotenv, json, traceback, threading -import subprocess, os -import litellm, openai -import random, uuid, requests -import datetime -from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError -####### ENVIRONMENT VARIABLES ################### -dotenv.load_dotenv() # Loading env variables using dotenv -sentry_sdk_instance = None -capture_exception = None -add_breadcrumb = None -posthog = None -slack_app = None -alerts_channel = None -heliconeLogger = None -callback_list = [] -user_logger_fn = None -additional_details = {} - -def print_verbose(print_statement): - if litellm.set_verbose: - print(f"LiteLLM: {print_statement}") - if random.random() <= 0.3: - print("Get help - https://discord.com/invite/wuPM9dRgDw") - -####### LOGGING ################### -#Logging function -> log the exact model details + what's being sent | Non-Blocking -def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None): - try: - model_call_details = {} - model_call_details["model"] = model - model_call_details["azure"] = azure - # log exception details - if exception: - model_call_details["original_exception"] = exception - - if litellm.telemetry: - safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`. - - model_call_details["input"] = input - # log additional call details -> api key, etc. - if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models: - model_call_details["api_type"] = openai.api_type - model_call_details["api_base"] = openai.api_base - model_call_details["api_version"] = openai.api_version - model_call_details["api_key"] = openai.api_key - elif "replicate" in model: - model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") - elif model in litellm.anthropic_models: - model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") - elif model in litellm.cohere_models: - model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") - model_call_details["additional_args"] = additional_args - ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs - print_verbose(f"Basic model call details: {model_call_details}") - if logger_fn and callable(logger_fn): - try: - logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object - except: - print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}") - except: - traceback.print_exc() - pass - -####### CLIENT ################### -# make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking -def client(original_function): - def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. - try: - global callback_list, add_breadcrumb - if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: - callback_list = list(set(litellm.success_callback + litellm.failure_callback)) - set_callbacks(callback_list=callback_list,) - if add_breadcrumb: - add_breadcrumb( - category="litellm.llm_call", - message=f"Positional Args: {args}, Keyword Args: {kwargs}", - level="info", - ) - except: # DO NOT BLOCK running the function because of this - print_verbose(f"[Non-Blocking] {traceback.format_exc()}") - pass - - def wrapper(*args, **kwargs): - try: - function_setup(args, kwargs) - ## MODEL CALL - start_time = datetime.datetime.now() - result = original_function(*args, **kwargs) - end_time = datetime.datetime.now() - ## LOG SUCCESS - my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread - my_thread.start() - return result - except Exception as e: - traceback_exception = traceback.format_exc() - my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread - my_thread.start() - raise e - return wrapper - -####### HELPER FUNCTIONS ################ -def set_callbacks(callback_list): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger - try: - for callback in callback_list: - if callback == "sentry": - try: - import sentry_sdk - except ImportError: - print_verbose("Package 'sentry_sdk' is missing. Installing it...") - subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk']) - import sentry_sdk - sentry_sdk_instance = sentry_sdk - sentry_trace_rate = os.environ.get("SENTRY_API_TRACE_RATE") if "SENTRY_API_TRACE_RATE" in os.environ else "1.0" - sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) - capture_exception = sentry_sdk_instance.capture_exception - add_breadcrumb = sentry_sdk_instance.add_breadcrumb - elif callback == "posthog": - try: - from posthog import Posthog - except ImportError: - print_verbose("Package 'posthog' is missing. Installing it...") - subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog']) - from posthog import Posthog - posthog = Posthog( - project_api_key=os.environ.get("POSTHOG_API_KEY"), - host=os.environ.get("POSTHOG_API_URL")) - elif callback == "slack": - try: - from slack_bolt import App - except ImportError: - print_verbose("Package 'slack_bolt' is missing. Installing it...") - subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt']) - from slack_bolt import App - slack_app = App( - token=os.environ.get("SLACK_API_TOKEN"), - signing_secret=os.environ.get("SLACK_API_SECRET") - ) - alerts_channel = os.environ["SLACK_API_CHANNEL"] - print_verbose(f"Initialized Slack App: {slack_app}") - elif callback == "helicone": - from .integrations.helicone import HeliconeLogger - - heliconeLogger = HeliconeLogger() - except: - pass - - -def handle_failure(exception, traceback_exception, args, kwargs): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel - try: - print_verbose(f"handle_failure args: {args}") - print_verbose(f"handle_failure kwargs: {kwargs}") - - success_handler = additional_details.pop("success_handler", None) - failure_handler = additional_details.pop("failure_handler", None) - - additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query") - print_verbose(f"self.failure_callback: {litellm.failure_callback}") - - print_verbose(f"additional_details: {additional_details}") - for callback in litellm.failure_callback: - try: - if callback == "slack": - slack_msg = "" - if len(kwargs) > 0: - for key in kwargs: - slack_msg += f"{key}: {kwargs[key]}\n" - if len(args) > 0: - for i, arg in enumerate(args): - slack_msg += f"LiteLLM_Args_{str(i)}: {arg}" - for detail in additional_details: - slack_msg += f"{detail}: {additional_details[detail]}\n" - slack_msg += f"Traceback: {traceback_exception}" - slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg) - elif callback == "sentry": - capture_exception(exception) - elif callback == "posthog": - print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}") - ph_obj = {} - if len(kwargs) > 0: - ph_obj = kwargs - if len(args) > 0: - for i, arg in enumerate(args): - ph_obj["litellm_args_" + str(i)] = arg - for detail in additional_details: - ph_obj[detail] = additional_details[detail] - event_name = additional_details["Event_Name"] - print_verbose(f"ph_obj: {ph_obj}") - print_verbose(f"PostHog Event Name: {event_name}") - if "user_id" in additional_details: - posthog.capture(additional_details["user_id"], event_name, ph_obj) - else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python - unique_id = str(uuid.uuid4()) - posthog.capture(unique_id, event_name) - print_verbose(f"successfully logged to PostHog!") - except: - print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}") - pass - - if failure_handler and callable(failure_handler): - call_details = { - "exception": exception, - "additional_details": additional_details - } - failure_handler(call_details) - pass - except: - pass - -def handle_success(args, kwargs, result, start_time, end_time): - global heliconeLogger - try: - success_handler = additional_details.pop("success_handler", None) - failure_handler = additional_details.pop("failure_handler", None) - additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query") - for callback in litellm.success_callback: - try: - if callback == "posthog": - ph_obj = {} - for detail in additional_details: - ph_obj[detail] = additional_details[detail] - event_name = additional_details["Event_Name"] - if "user_id" in additional_details: - posthog.capture(additional_details["user_id"], event_name, ph_obj) - else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python - unique_id = str(uuid.uuid4()) - posthog.capture(unique_id, event_name, ph_obj) - pass - elif callback == "slack": - slack_msg = "" - for detail in additional_details: - slack_msg += f"{detail}: {additional_details[detail]}\n" - slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg) - elif callback == "helicone": - print_verbose("reaches helicone for logging!") - model = args[0] if len(args) > 0 else kwargs["model"] - messages = args[1] if len(args) > 1 else kwargs["messages"] - heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose) - except: - print_verbose(f"Success Callback Error - {traceback.format_exc()}") - pass - - if success_handler and callable(success_handler): - success_handler(args, kwargs) - pass - except: - print_verbose(f"Success Callback Error - {traceback.format_exc()}") - pass - - -def exception_type(model, original_exception): - try: - if isinstance(original_exception, OpenAIError): - # Handle the OpenAIError - raise original_exception - elif model: - error_str = str(original_exception) - if isinstance(original_exception, BaseException): - exception_type = type(original_exception).__name__ - else: - exception_type = "" - if "claude" in model: #one of the anthropics - if "status_code" in original_exception: - print_verbose(f"status_code: {original_exception.status_code}") - if original_exception.status_code == 401: - raise AuthenticationError(f"AnthropicException - {original_exception.message}") - elif original_exception.status_code == 400: - raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}") - elif original_exception.status_code == 429: - raise RateLimitError(f"AnthropicException - {original_exception.message}") - elif "replicate" in model: - if "Incorrect authentication token" in error_str: - raise AuthenticationError(f"ReplicateException - {error_str}") - elif exception_type == "ModelError": - raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}") - elif "Request was throttled" in error_str: - raise RateLimitError(f"ReplicateException - {error_str}") - elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side - raise ServiceUnavailableError(f"ReplicateException - {error_str}") - elif model == "command-nightly": #Cohere - if "invalid api token" in error_str or "No API key provided." in error_str: - raise AuthenticationError(f"CohereException - {error_str}") - elif "too many tokens" in error_str: - raise InvalidRequestError(f"CohereException - {error_str}", f"{model}") - elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min) - raise RateLimitError(f"CohereException - {original_exception.message}") - raise original_exception # base case - return the original exception - else: - raise original_exception - except: - raise original_exception - -def safe_crash_reporting(model=None, exception=None, azure=None): - data = { - "model": model, - "exception": str(exception), - "azure": azure - } - threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start() - -def litellm_telemetry(data): - # Load or generate the UUID - uuid_file = 'litellm_uuid.txt' - try: - # Try to open the file and load the UUID - with open(uuid_file, 'r') as file: - uuid_value = file.read() - if uuid_value: - uuid_value = uuid_value.strip() - else: - raise FileNotFoundError - except FileNotFoundError: - # Generate a new UUID if the file doesn't exist or is empty - new_uuid = uuid.uuid4() - uuid_value = str(new_uuid) - with open(uuid_file, 'w') as file: - file.write(uuid_value) - - # Prepare the data to send to localhost:3000 - payload = { - 'uuid': uuid_value, - 'data': data - } - print_verbose(f"payload: {payload}") - try: - # Make the POST request to localhost:3000 - response = requests.post('https://litellm.berri.ai/logging', json=payload) - response.raise_for_status() # Raise an exception for HTTP errors - except requests.exceptions.RequestException as e: - # Handle any errors in the request - pass \ No newline at end of file diff --git a/dist/litellm-0.1.229-py3-none-any.whl b/dist/litellm-0.1.229-py3-none-any.whl deleted file mode 100644 index 5d4c7ac2c..000000000 Binary files a/dist/litellm-0.1.229-py3-none-any.whl and /dev/null differ diff --git a/dist/litellm-0.1.229.tar.gz b/dist/litellm-0.1.229.tar.gz deleted file mode 100644 index a157ac869..000000000 Binary files a/dist/litellm-0.1.229.tar.gz and /dev/null differ diff --git a/dist/litellm-0.1.2291-py3-none-any.whl b/dist/litellm-0.1.2291-py3-none-any.whl deleted file mode 100644 index b9128dbd0..000000000 Binary files a/dist/litellm-0.1.2291-py3-none-any.whl and /dev/null differ diff --git a/dist/litellm-0.1.2291.tar.gz b/dist/litellm-0.1.2291.tar.gz deleted file mode 100644 index 1a286f7bf..000000000 Binary files a/dist/litellm-0.1.2291.tar.gz and /dev/null differ diff --git a/litellm.egg-info/PKG-INFO b/litellm.egg-info/PKG-INFO deleted file mode 100644 index f01915a43..000000000 --- a/litellm.egg-info/PKG-INFO +++ /dev/null @@ -1,6 +0,0 @@ -Metadata-Version: 2.1 -Name: litellm -Version: 0.1.2291 -Summary: Library to easily interface with LLM API providers -Author: BerriAI -License-File: LICENSE diff --git a/litellm.egg-info/SOURCES.txt b/litellm.egg-info/SOURCES.txt deleted file mode 100644 index 88f47e84f..000000000 --- a/litellm.egg-info/SOURCES.txt +++ /dev/null @@ -1,15 +0,0 @@ -LICENSE -README.md -pyproject.toml -setup.py -litellm/__init__.py -litellm/main.py -litellm/timeout.py -litellm/utils.py -litellm.egg-info/PKG-INFO -litellm.egg-info/SOURCES.txt -litellm.egg-info/dependency_links.txt -litellm.egg-info/requires.txt -litellm.egg-info/top_level.txt -litellm/integrations/__init__.py -litellm/integrations/helicone.py \ No newline at end of file diff --git a/litellm.egg-info/dependency_links.txt b/litellm.egg-info/dependency_links.txt deleted file mode 100644 index 8b1378917..000000000 --- a/litellm.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/litellm.egg-info/requires.txt b/litellm.egg-info/requires.txt deleted file mode 100644 index b59e88b89..000000000 --- a/litellm.egg-info/requires.txt +++ /dev/null @@ -1,8 +0,0 @@ -openai -cohere -pytest -anthropic -replicate -python-dotenv -openai[datalib] -tenacity diff --git a/litellm.egg-info/top_level.txt b/litellm.egg-info/top_level.txt deleted file mode 100644 index 8e637fbf5..000000000 --- a/litellm.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -litellm