forked from phoenix/litellm-mirror
bug fixes and updates
This commit is contained in:
parent
efe82c6bb6
commit
85f1591413
17 changed files with 646 additions and 330 deletions
|
@ -1,2 +1,31 @@
|
||||||
__version__ = "1.0.0"
|
success_callback = []
|
||||||
|
failure_callback = []
|
||||||
|
set_verbose=False
|
||||||
|
telemetry=True
|
||||||
|
####### COMPLETION MODELS ###################
|
||||||
|
open_ai_chat_completion_models = [
|
||||||
|
'gpt-3.5-turbo',
|
||||||
|
'gpt-4'
|
||||||
|
]
|
||||||
|
open_ai_text_completion_models = [
|
||||||
|
'text-davinci-003'
|
||||||
|
]
|
||||||
|
|
||||||
|
cohere_models = [
|
||||||
|
'command-nightly',
|
||||||
|
]
|
||||||
|
|
||||||
|
anthropic_models = [
|
||||||
|
"claude-2",
|
||||||
|
"claude-instant-1"
|
||||||
|
]
|
||||||
|
|
||||||
|
####### EMBEDDING MODELS ###################
|
||||||
|
open_ai_embedding_models = [
|
||||||
|
'text-embedding-ada-002'
|
||||||
|
]
|
||||||
|
|
||||||
|
from .timeout import timeout
|
||||||
|
from .utils import client, logging, exception_type # Import all the symbols from main.py
|
||||||
from .main import * # Import all the symbols from main.py
|
from .main import * # Import all the symbols from main.py
|
||||||
|
|
||||||
|
|
|
@ -1,49 +1,77 @@
|
||||||
import os, openai, cohere, replicate, sys
|
import os, openai, cohere, replicate, sys
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from func_timeout import func_set_timeout, FunctionTimedOut
|
|
||||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
import json
|
|
||||||
import traceback
|
import traceback
|
||||||
import threading
|
|
||||||
import dotenv
|
import dotenv
|
||||||
import traceback
|
import traceback
|
||||||
import subprocess
|
import litellm
|
||||||
|
from litellm import client, logging, exception_type, timeout, success_callback, failure_callback
|
||||||
|
import random
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
# Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
dotenv.load_dotenv()
|
|
||||||
set_verbose = False
|
|
||||||
|
|
||||||
####### COMPLETION MODELS ###################
|
|
||||||
open_ai_chat_completion_models = [
|
|
||||||
'gpt-3.5-turbo',
|
|
||||||
'gpt-4'
|
|
||||||
]
|
|
||||||
open_ai_text_completion_models = [
|
|
||||||
'text-davinci-003'
|
|
||||||
]
|
|
||||||
|
|
||||||
cohere_models = [
|
|
||||||
'command-nightly',
|
|
||||||
]
|
|
||||||
|
|
||||||
anthropic_models = [
|
|
||||||
"claude-2",
|
|
||||||
"claude-instant-1"
|
|
||||||
]
|
|
||||||
|
|
||||||
####### EMBEDDING MODELS ###################
|
|
||||||
open_ai_embedding_models = [
|
|
||||||
'text-embedding-ada-002'
|
|
||||||
]
|
|
||||||
|
|
||||||
#############################################
|
|
||||||
|
|
||||||
|
def get_optional_params(
|
||||||
|
# 12 optional params
|
||||||
|
functions = [],
|
||||||
|
function_call = "",
|
||||||
|
temperature = 1,
|
||||||
|
top_p = 1,
|
||||||
|
n = 1,
|
||||||
|
stream = False,
|
||||||
|
stop = None,
|
||||||
|
max_tokens = float('inf'),
|
||||||
|
presence_penalty = 0,
|
||||||
|
frequency_penalty = 0,
|
||||||
|
logit_bias = {},
|
||||||
|
user = "",
|
||||||
|
):
|
||||||
|
optional_params = {}
|
||||||
|
if functions != []:
|
||||||
|
optional_params["functions"] = functions
|
||||||
|
if function_call != "":
|
||||||
|
optional_params["function_call"] = function_call
|
||||||
|
if temperature != 1:
|
||||||
|
optional_params["temperature"] = temperature
|
||||||
|
if top_p != 1:
|
||||||
|
optional_params["top_p"] = top_p
|
||||||
|
if n != 1:
|
||||||
|
optional_params["n"] = n
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
|
if stop != None:
|
||||||
|
optional_params["stop"] = stop
|
||||||
|
if max_tokens != float('inf'):
|
||||||
|
optional_params["max_tokens"] = max_tokens
|
||||||
|
if presence_penalty != 0:
|
||||||
|
optional_params["presence_penalty"] = presence_penalty
|
||||||
|
if frequency_penalty != 0:
|
||||||
|
optional_params["frequency_penalty"] = frequency_penalty
|
||||||
|
if logit_bias != {}:
|
||||||
|
optional_params["logit_bias"] = logit_bias
|
||||||
|
if user != "":
|
||||||
|
optional_params["user"] = user
|
||||||
|
return optional_params
|
||||||
|
|
||||||
####### COMPLETION ENDPOINTS ################
|
####### COMPLETION ENDPOINTS ################
|
||||||
#############################################
|
#############################################
|
||||||
@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure)
|
@client
|
||||||
def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None):
|
@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
||||||
|
def completion(
|
||||||
|
model, messages, # required params
|
||||||
|
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
|
||||||
|
functions=[], function_call="", # optional params
|
||||||
|
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
|
||||||
|
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="",
|
||||||
|
# Optional liteLLM function params
|
||||||
|
*, force_timeout=60, azure=False, logger_fn=None, verbose=False
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
|
# check if user passed in any of the OpenAI optional params
|
||||||
|
optional_params = get_optional_params(
|
||||||
|
functions=functions, function_call=function_call,
|
||||||
|
temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
|
||||||
|
presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user
|
||||||
|
)
|
||||||
if azure == True:
|
if azure == True:
|
||||||
# azure configs
|
# azure configs
|
||||||
openai.api_type = "azure"
|
openai.api_type = "azure"
|
||||||
|
@ -51,11 +79,39 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
||||||
openai.api_key = os.environ.get("AZURE_API_KEY")
|
openai.api_key = os.environ.get("AZURE_API_KEY")
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
engine=model,
|
engine=model,
|
||||||
messages = messages
|
messages = messages,
|
||||||
|
**optional_params
|
||||||
|
)
|
||||||
|
elif model in litellm.open_ai_chat_completion_models:
|
||||||
|
openai.api_type = "openai"
|
||||||
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
|
openai.api_version = None
|
||||||
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
|
|
||||||
|
## COMPLETION CALL
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model=model,
|
||||||
|
messages = messages,
|
||||||
|
**optional_params
|
||||||
|
)
|
||||||
|
elif model in litellm.open_ai_text_completion_models:
|
||||||
|
openai.api_type = "openai"
|
||||||
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
|
openai.api_version = None
|
||||||
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
|
response = openai.Completion.create(
|
||||||
|
model=model,
|
||||||
|
prompt = prompt
|
||||||
)
|
)
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
||||||
|
@ -64,8 +120,8 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
|
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
|
||||||
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
|
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
input = [{"prompt": prompt}]
|
input = {"prompt": prompt}
|
||||||
if max_tokens:
|
if max_tokens != float('inf'):
|
||||||
input["max_length"] = max_tokens # for t5 models
|
input["max_length"] = max_tokens # for t5 models
|
||||||
input["max_new_tokens"] = max_tokens # for llama2 models
|
input["max_new_tokens"] = max_tokens # for llama2 models
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
@ -90,7 +146,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
response = new_response
|
response = new_response
|
||||||
elif model in anthropic_models:
|
elif model in litellm.anthropic_models:
|
||||||
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
||||||
prompt = f"{HUMAN_PROMPT}"
|
prompt = f"{HUMAN_PROMPT}"
|
||||||
for message in messages:
|
for message in messages:
|
||||||
|
@ -103,7 +159,8 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
||||||
prompt += f"{AI_PROMPT}"
|
prompt += f"{AI_PROMPT}"
|
||||||
anthropic = Anthropic()
|
anthropic = Anthropic()
|
||||||
if max_tokens:
|
# check if user passed in max_tokens != float('inf')
|
||||||
|
if max_tokens != float('inf'):
|
||||||
max_tokens_to_sample = max_tokens
|
max_tokens_to_sample = max_tokens
|
||||||
else:
|
else:
|
||||||
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
|
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
|
||||||
|
@ -127,9 +184,9 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
print(f"new response: {new_response}")
|
print_verbose(f"new response: {new_response}")
|
||||||
response = new_response
|
response = new_response
|
||||||
elif model in cohere_models:
|
elif model in litellm.cohere_models:
|
||||||
cohere_key = os.environ.get("COHERE_API_KEY")
|
cohere_key = os.environ.get("COHERE_API_KEY")
|
||||||
co = cohere.Client(cohere_key)
|
co = cohere.Client(cohere_key)
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
@ -146,7 +203,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"message": {
|
"message": {
|
||||||
"content": response[0],
|
"content": response[0].text,
|
||||||
"role": "assistant"
|
"role": "assistant"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -154,7 +211,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
}
|
}
|
||||||
response = new_response
|
response = new_response
|
||||||
|
|
||||||
elif model in open_ai_chat_completion_models:
|
elif model in litellm.open_ai_chat_completion_models:
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
openai.api_base = "https://api.openai.com/v1"
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
|
@ -166,7 +223,7 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
model=model,
|
model=model,
|
||||||
messages = messages
|
messages = messages
|
||||||
)
|
)
|
||||||
elif model in open_ai_text_completion_models:
|
elif model in litellm.open_ai_text_completion_models:
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
openai.api_base = "https://api.openai.com/v1"
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
|
@ -181,15 +238,21 @@ def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, l
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
|
args = locals()
|
||||||
|
raise ValueError(f"No valid completion model args passed in - {args}")
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
# log the original exception
|
||||||
raise e
|
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
|
||||||
|
## Map to OpenAI Exception
|
||||||
|
raise exception_type(model=model, original_exception=e)
|
||||||
|
|
||||||
|
|
||||||
### EMBEDDING ENDPOINTS ####################
|
### EMBEDDING ENDPOINTS ####################
|
||||||
@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/
|
@client
|
||||||
def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
|
@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
||||||
|
def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
|
||||||
|
try:
|
||||||
response = None
|
response = None
|
||||||
if azure == True:
|
if azure == True:
|
||||||
# azure configs
|
# azure configs
|
||||||
|
@ -202,7 +265,7 @@ def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
|
||||||
## EMBEDDING CALL
|
## EMBEDDING CALL
|
||||||
response = openai.Embedding.create(input=input, engine=model)
|
response = openai.Embedding.create(input=input, engine=model)
|
||||||
print_verbose(f"response_value: {str(response)[:50]}")
|
print_verbose(f"response_value: {str(response)[:50]}")
|
||||||
elif model in open_ai_embedding_models:
|
elif model in litellm.open_ai_embedding_models:
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
openai.api_base = "https://api.openai.com/v1"
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
|
@ -214,216 +277,20 @@ def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
|
||||||
print_verbose(f"response_value: {str(response)[:50]}")
|
print_verbose(f"response_value: {str(response)[:50]}")
|
||||||
else:
|
else:
|
||||||
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
args = locals()
|
||||||
|
raise ValueError(f"No valid embedding model args passed in - {args}")
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc.
|
|
||||||
class litellm_client:
|
|
||||||
def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor
|
|
||||||
set_verbose = verbose
|
|
||||||
self.success_callback = success_callback
|
|
||||||
self.failure_callback = failure_callback
|
|
||||||
self.logger_fn = None # if user passes in their own logging function
|
|
||||||
self.callback_list = list(set(self.success_callback + self.failure_callback))
|
|
||||||
self.set_callbacks()
|
|
||||||
|
|
||||||
## COMPLETION CALL
|
|
||||||
def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any:
|
|
||||||
try:
|
|
||||||
self.logger_fn = logger_fn
|
|
||||||
response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input)
|
|
||||||
my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread
|
|
||||||
my_thread.start()
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
args = locals() # get all the param values
|
# log the original exception
|
||||||
self.handle_failure(e, args)
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e)
|
||||||
raise e
|
## Map to OpenAI Exception
|
||||||
|
raise exception_type(model=model, original_exception=e)
|
||||||
## EMBEDDING CALL
|
|
||||||
def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any:
|
|
||||||
try:
|
|
||||||
self.logger_fn = logger_fn
|
|
||||||
response = embedding(model, input, azure=azure, logger_fn=self.handle_input)
|
|
||||||
my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread
|
|
||||||
my_thread.start()
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
args = locals() # get all the param values
|
|
||||||
self.handle_failure(e, args)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
def set_callbacks(self): #instantiate any external packages
|
|
||||||
for callback in self.callback_list: # only install what's required
|
|
||||||
if callback == "sentry":
|
|
||||||
try:
|
|
||||||
import sentry_sdk
|
|
||||||
except ImportError:
|
|
||||||
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
|
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
|
|
||||||
import sentry_sdk
|
|
||||||
self.sentry_sdk = sentry_sdk
|
|
||||||
self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
|
|
||||||
self.capture_exception = self.sentry_sdk.capture_exception
|
|
||||||
self.add_breadcrumb = self.sentry_sdk.add_breadcrumb
|
|
||||||
elif callback == "posthog":
|
|
||||||
try:
|
|
||||||
from posthog import Posthog
|
|
||||||
except:
|
|
||||||
print_verbose("Package 'posthog' is missing. Installing it...")
|
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
|
|
||||||
from posthog import Posthog
|
|
||||||
self.posthog = Posthog(
|
|
||||||
project_api_key=os.environ.get("POSTHOG_API_KEY"),
|
|
||||||
host=os.environ.get("POSTHOG_API_URL"))
|
|
||||||
elif callback == "slack":
|
|
||||||
try:
|
|
||||||
from slack_bolt import App
|
|
||||||
except ImportError:
|
|
||||||
print_verbose("Package 'slack_bolt' is missing. Installing it...")
|
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
|
|
||||||
from slack_bolt import App
|
|
||||||
self.slack_app = App(
|
|
||||||
token=os.environ.get("SLACK_API_TOKEN"),
|
|
||||||
signing_secret=os.environ.get("SLACK_API_SECRET")
|
|
||||||
)
|
|
||||||
self.alerts_channel = os.environ["SLACK_API_CHANNEL"]
|
|
||||||
|
|
||||||
def handle_input(self, model_call_details={}):
|
|
||||||
if len(model_call_details.keys()) > 0:
|
|
||||||
model = model_call_details["model"] if "model" in model_call_details else None
|
|
||||||
if model:
|
|
||||||
for callback in self.callback_list:
|
|
||||||
if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration
|
|
||||||
self.add_breadcrumb(
|
|
||||||
category=f'{model}',
|
|
||||||
message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)),
|
|
||||||
level='info',
|
|
||||||
)
|
|
||||||
if self.logger_fn and callable(self.logger_fn):
|
|
||||||
self.logger_fn(model_call_details)
|
|
||||||
pass
|
|
||||||
|
|
||||||
def handle_success(self, model, messages, additional_details):
|
|
||||||
success_handler = additional_details.pop("success_handler", None)
|
|
||||||
failure_handler = additional_details.pop("failure_handler", None)
|
|
||||||
additional_details["litellm_model"] = str(model)
|
|
||||||
additional_details["litellm_messages"] = str(messages)
|
|
||||||
for callback in self.success_callback:
|
|
||||||
try:
|
|
||||||
if callback == "posthog":
|
|
||||||
ph_obj = {}
|
|
||||||
for detail in additional_details:
|
|
||||||
ph_obj[detail] = additional_details[detail]
|
|
||||||
event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query"
|
|
||||||
if "user_id" in additional_details:
|
|
||||||
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
|
||||||
else:
|
|
||||||
self.posthog.capture(event_name, ph_obj)
|
|
||||||
pass
|
|
||||||
elif callback == "slack":
|
|
||||||
slack_msg = ""
|
|
||||||
if len(additional_details.keys()) > 0:
|
|
||||||
for detail in additional_details:
|
|
||||||
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
|
||||||
slack_msg += f"Successful call"
|
|
||||||
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if success_handler and callable(success_handler):
|
|
||||||
call_details = {
|
|
||||||
"model": model,
|
|
||||||
"messages": messages,
|
|
||||||
"additional_details": additional_details
|
|
||||||
}
|
|
||||||
success_handler(call_details)
|
|
||||||
pass
|
|
||||||
|
|
||||||
def handle_failure(self, exception, args):
|
|
||||||
args.pop("self")
|
|
||||||
additional_details = args.pop("additional_details", {})
|
|
||||||
|
|
||||||
success_handler = additional_details.pop("success_handler", None)
|
|
||||||
failure_handler = additional_details.pop("failure_handler", None)
|
|
||||||
|
|
||||||
for callback in self.failure_callback:
|
|
||||||
try:
|
|
||||||
if callback == "slack":
|
|
||||||
slack_msg = ""
|
|
||||||
for param in args:
|
|
||||||
slack_msg += f"{param}: {args[param]}\n"
|
|
||||||
if len(additional_details.keys()) > 0:
|
|
||||||
for detail in additional_details:
|
|
||||||
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
|
||||||
slack_msg += f"Traceback: {traceback.format_exc()}"
|
|
||||||
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
|
|
||||||
elif callback == "sentry":
|
|
||||||
self.capture_exception(exception)
|
|
||||||
elif callback == "posthog":
|
|
||||||
if len(additional_details.keys()) > 0:
|
|
||||||
ph_obj = {}
|
|
||||||
for param in args:
|
|
||||||
ph_obj[param] += args[param]
|
|
||||||
for detail in additional_details:
|
|
||||||
ph_obj[detail] = additional_details[detail]
|
|
||||||
event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query"
|
|
||||||
if "user_id" in additional_details:
|
|
||||||
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
|
||||||
else:
|
|
||||||
self.posthog.capture(event_name, ph_obj)
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
except:
|
|
||||||
print(f"got an error calling {callback} - {traceback.format_exc()}")
|
|
||||||
|
|
||||||
if failure_handler and callable(failure_handler):
|
|
||||||
call_details = {
|
|
||||||
"exception": exception,
|
|
||||||
"additional_details": additional_details
|
|
||||||
}
|
|
||||||
failure_handler(call_details)
|
|
||||||
pass
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
|
## Set verbose to true -> ```litellm.set_verbose = True```
|
||||||
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
|
||||||
def logging(model, input, azure=False, additional_args={}, logger_fn=None):
|
|
||||||
try:
|
|
||||||
model_call_details = {}
|
|
||||||
model_call_details["model"] = model
|
|
||||||
model_call_details["input"] = input
|
|
||||||
model_call_details["azure"] = azure
|
|
||||||
model_call_details["additional_args"] = additional_args
|
|
||||||
if logger_fn and callable(logger_fn):
|
|
||||||
try:
|
|
||||||
# log additional call details -> api key, etc.
|
|
||||||
if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models:
|
|
||||||
model_call_details["api_type"] = openai.api_type
|
|
||||||
model_call_details["api_base"] = openai.api_base
|
|
||||||
model_call_details["api_version"] = openai.api_version
|
|
||||||
model_call_details["api_key"] = openai.api_key
|
|
||||||
elif "replicate" in model:
|
|
||||||
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
|
|
||||||
elif model in anthropic_models:
|
|
||||||
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
|
||||||
elif model in cohere_models:
|
|
||||||
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
|
||||||
|
|
||||||
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
|
|
||||||
except:
|
|
||||||
print_verbose(f"Basic model call details: {model_call_details}")
|
|
||||||
print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
print_verbose(f"Basic model call details: {model_call_details}")
|
|
||||||
pass
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
## Set verbose to true -> ```litellm.verbose = True```
|
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
if set_verbose:
|
if litellm.set_verbose:
|
||||||
print(f"LiteLLM: {print_statement}")
|
print(f"LiteLLM: {print_statement}")
|
||||||
|
if random.random() <= 0.3:
|
||||||
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
||||||
|
|
||||||
|
|
80
build/lib/litellm/timeout.py
Normal file
80
build/lib/litellm/timeout.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
"""
|
||||||
|
Module containing "timeout" decorator for sync and async callables.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from concurrent import futures
|
||||||
|
from inspect import iscoroutinefunction
|
||||||
|
from functools import wraps
|
||||||
|
from threading import Thread
|
||||||
|
from openai.error import Timeout
|
||||||
|
|
||||||
|
|
||||||
|
def timeout(
|
||||||
|
timeout_duration: float = None, exception_to_raise = Timeout
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Wraps a function to raise the specified exception if execution time
|
||||||
|
is greater than the specified timeout.
|
||||||
|
|
||||||
|
Works with both synchronous and asynchronous callables, but with synchronous ones will introduce
|
||||||
|
some overhead due to the backend use of threads and asyncio.
|
||||||
|
|
||||||
|
:param float timeout_duration: Timeout duration in seconds. If none callable won't time out.
|
||||||
|
:param OpenAIError exception_to_raise: Exception to raise when the callable times out.
|
||||||
|
Defaults to TimeoutError.
|
||||||
|
:return: The decorated function.
|
||||||
|
:rtype: callable
|
||||||
|
"""
|
||||||
|
|
||||||
|
def decorator(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
async def async_func():
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
thread = _LoopWrapper()
|
||||||
|
thread.start()
|
||||||
|
future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop)
|
||||||
|
try:
|
||||||
|
local_timeout_duration = timeout_duration
|
||||||
|
if "force_timeout" in kwargs:
|
||||||
|
local_timeout_duration = kwargs["force_timeout"]
|
||||||
|
result = future.result(timeout=local_timeout_duration)
|
||||||
|
except futures.TimeoutError:
|
||||||
|
thread.stop_loop()
|
||||||
|
raise exception_to_raise()
|
||||||
|
thread.stop_loop()
|
||||||
|
return result
|
||||||
|
|
||||||
|
@wraps(func)
|
||||||
|
async def async_wrapper(*args, **kwargs):
|
||||||
|
try:
|
||||||
|
value = await asyncio.wait_for(
|
||||||
|
func(*args, **kwargs), timeout=timeout_duration
|
||||||
|
)
|
||||||
|
return value
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
raise exception_to_raise()
|
||||||
|
|
||||||
|
if iscoroutinefunction(func):
|
||||||
|
return async_wrapper
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
class _LoopWrapper(Thread):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(daemon=True)
|
||||||
|
self.loop = asyncio.new_event_loop()
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
self.loop.run_forever()
|
||||||
|
self.loop.call_soon_threadsafe(self.loop.close)
|
||||||
|
|
||||||
|
def stop_loop(self):
|
||||||
|
for task in asyncio.all_tasks(self.loop):
|
||||||
|
task.cancel()
|
||||||
|
self.loop.call_soon_threadsafe(self.loop.stop)
|
316
build/lib/litellm/utils.py
Normal file
316
build/lib/litellm/utils.py
Normal file
|
@ -0,0 +1,316 @@
|
||||||
|
import dotenv, json, traceback, threading
|
||||||
|
import subprocess, os
|
||||||
|
import litellm, openai
|
||||||
|
import random, uuid, requests
|
||||||
|
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
||||||
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
sentry_sdk_instance = None
|
||||||
|
capture_exception = None
|
||||||
|
add_breadcrumb = None
|
||||||
|
posthog = None
|
||||||
|
slack_app = None
|
||||||
|
alerts_channel = None
|
||||||
|
callback_list = []
|
||||||
|
user_logger_fn = None
|
||||||
|
additional_details = {}
|
||||||
|
|
||||||
|
def print_verbose(print_statement):
|
||||||
|
if litellm.set_verbose:
|
||||||
|
print(f"LiteLLM: {print_statement}")
|
||||||
|
if random.random() <= 0.3:
|
||||||
|
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
||||||
|
|
||||||
|
####### LOGGING ###################
|
||||||
|
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||||
|
def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
|
||||||
|
try:
|
||||||
|
model_call_details = {}
|
||||||
|
model_call_details["model"] = model
|
||||||
|
model_call_details["azure"] = azure
|
||||||
|
# log exception details
|
||||||
|
if exception:
|
||||||
|
model_call_details["original_exception"] = exception
|
||||||
|
|
||||||
|
if litellm.telemetry:
|
||||||
|
safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
|
||||||
|
|
||||||
|
model_call_details["input"] = input
|
||||||
|
# log additional call details -> api key, etc.
|
||||||
|
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
|
||||||
|
model_call_details["api_type"] = openai.api_type
|
||||||
|
model_call_details["api_base"] = openai.api_base
|
||||||
|
model_call_details["api_version"] = openai.api_version
|
||||||
|
model_call_details["api_key"] = openai.api_key
|
||||||
|
elif "replicate" in model:
|
||||||
|
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
|
||||||
|
elif model in litellm.anthropic_models:
|
||||||
|
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
|
elif model in litellm.cohere_models:
|
||||||
|
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
||||||
|
model_call_details["additional_args"] = additional_args
|
||||||
|
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
|
||||||
|
print_verbose(f"Basic model call details: {model_call_details}")
|
||||||
|
if logger_fn and callable(logger_fn):
|
||||||
|
try:
|
||||||
|
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
|
||||||
|
except:
|
||||||
|
print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
pass
|
||||||
|
|
||||||
|
####### CLIENT ###################
|
||||||
|
# make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
|
||||||
|
def client(original_function):
|
||||||
|
def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
||||||
|
try:
|
||||||
|
global callback_list, add_breadcrumb
|
||||||
|
if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0:
|
||||||
|
callback_list = list(set(litellm.success_callback + litellm.failure_callback))
|
||||||
|
set_callbacks(callback_list=callback_list)
|
||||||
|
if add_breadcrumb:
|
||||||
|
add_breadcrumb(
|
||||||
|
category="litellm.llm_call",
|
||||||
|
message=f"Positional Args: {args}, Keyword Args: {kwargs}",
|
||||||
|
level="info",
|
||||||
|
)
|
||||||
|
except: # DO NOT BLOCK running the function because of this
|
||||||
|
print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
try:
|
||||||
|
function_setup(args, kwargs)
|
||||||
|
## MODEL CALL
|
||||||
|
result = original_function(*args, **kwargs)
|
||||||
|
## LOG SUCCESS
|
||||||
|
my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread
|
||||||
|
my_thread.start()
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
traceback_exception = traceback.format_exc()
|
||||||
|
my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread
|
||||||
|
my_thread.start()
|
||||||
|
raise e
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
####### HELPER FUNCTIONS ################
|
||||||
|
def set_callbacks(callback_list):
|
||||||
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
|
||||||
|
try:
|
||||||
|
for callback in callback_list:
|
||||||
|
if callback == "sentry":
|
||||||
|
try:
|
||||||
|
import sentry_sdk
|
||||||
|
except ImportError:
|
||||||
|
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
|
||||||
|
import sentry_sdk
|
||||||
|
sentry_sdk_instance = sentry_sdk
|
||||||
|
sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
|
||||||
|
capture_exception = sentry_sdk_instance.capture_exception
|
||||||
|
add_breadcrumb = sentry_sdk_instance.add_breadcrumb
|
||||||
|
elif callback == "posthog":
|
||||||
|
try:
|
||||||
|
from posthog import Posthog
|
||||||
|
except ImportError:
|
||||||
|
print_verbose("Package 'posthog' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
|
||||||
|
from posthog import Posthog
|
||||||
|
posthog = Posthog(
|
||||||
|
project_api_key=os.environ.get("POSTHOG_API_KEY"),
|
||||||
|
host=os.environ.get("POSTHOG_API_URL"))
|
||||||
|
elif callback == "slack":
|
||||||
|
try:
|
||||||
|
from slack_bolt import App
|
||||||
|
except ImportError:
|
||||||
|
print_verbose("Package 'slack_bolt' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
|
||||||
|
from slack_bolt import App
|
||||||
|
slack_app = App(
|
||||||
|
token=os.environ.get("SLACK_API_TOKEN"),
|
||||||
|
signing_secret=os.environ.get("SLACK_API_SECRET")
|
||||||
|
)
|
||||||
|
alerts_channel = os.environ["SLACK_API_CHANNEL"]
|
||||||
|
print_verbose(f"Initialized Slack App: {slack_app}")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def handle_failure(exception, traceback_exception, args, kwargs):
|
||||||
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
|
||||||
|
try:
|
||||||
|
print_verbose(f"handle_failure args: {args}")
|
||||||
|
print_verbose(f"handle_failure kwargs: {kwargs}")
|
||||||
|
|
||||||
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
|
||||||
|
additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
|
||||||
|
print_verbose(f"self.failure_callback: {litellm.failure_callback}")
|
||||||
|
|
||||||
|
print_verbose(f"additional_details: {additional_details}")
|
||||||
|
for callback in litellm.failure_callback:
|
||||||
|
try:
|
||||||
|
if callback == "slack":
|
||||||
|
slack_msg = ""
|
||||||
|
if len(kwargs) > 0:
|
||||||
|
for key in kwargs:
|
||||||
|
slack_msg += f"{key}: {kwargs[key]}\n"
|
||||||
|
if len(args) > 0:
|
||||||
|
for i, arg in enumerate(args):
|
||||||
|
slack_msg += f"LiteLLM_Args_{str(i)}: {arg}"
|
||||||
|
for detail in additional_details:
|
||||||
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
|
slack_msg += f"Traceback: {traceback_exception}"
|
||||||
|
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
|
||||||
|
elif callback == "sentry":
|
||||||
|
capture_exception(exception)
|
||||||
|
elif callback == "posthog":
|
||||||
|
print_verbose(f"inside posthog, additional_details: {len(additional_details.keys())}")
|
||||||
|
ph_obj = {}
|
||||||
|
if len(kwargs) > 0:
|
||||||
|
ph_obj = kwargs
|
||||||
|
if len(args) > 0:
|
||||||
|
for i, arg in enumerate(args):
|
||||||
|
ph_obj["litellm_args_" + str(i)] = arg
|
||||||
|
for detail in additional_details:
|
||||||
|
ph_obj[detail] = additional_details[detail]
|
||||||
|
event_name = additional_details["Event_Name"]
|
||||||
|
print_verbose(f"ph_obj: {ph_obj}")
|
||||||
|
print_verbose(f"PostHog Event Name: {event_name}")
|
||||||
|
if "user_id" in additional_details:
|
||||||
|
posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
||||||
|
else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
|
||||||
|
unique_id = str(uuid.uuid4())
|
||||||
|
posthog.capture(unique_id, event_name)
|
||||||
|
print_verbose(f"successfully logged to PostHog!")
|
||||||
|
except:
|
||||||
|
print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
if failure_handler and callable(failure_handler):
|
||||||
|
call_details = {
|
||||||
|
"exception": exception,
|
||||||
|
"additional_details": additional_details
|
||||||
|
}
|
||||||
|
failure_handler(call_details)
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def handle_success(*args, **kwargs):
|
||||||
|
try:
|
||||||
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
additional_details["Event_Name"] = additional_details.pop("successful_event_name", "litellm.succes_query")
|
||||||
|
for callback in litellm.success_callback:
|
||||||
|
try:
|
||||||
|
if callback == "posthog":
|
||||||
|
ph_obj = {}
|
||||||
|
for detail in additional_details:
|
||||||
|
ph_obj[detail] = additional_details[detail]
|
||||||
|
event_name = additional_details["Event_Name"]
|
||||||
|
if "user_id" in additional_details:
|
||||||
|
posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
||||||
|
else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
|
||||||
|
unique_id = str(uuid.uuid4())
|
||||||
|
posthog.capture(unique_id, event_name, ph_obj)
|
||||||
|
pass
|
||||||
|
elif callback == "slack":
|
||||||
|
slack_msg = ""
|
||||||
|
for detail in additional_details:
|
||||||
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
|
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if success_handler and callable(success_handler):
|
||||||
|
success_handler(args, kwargs)
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def exception_type(model, original_exception):
|
||||||
|
try:
|
||||||
|
if isinstance(original_exception, OpenAIError):
|
||||||
|
# Handle the OpenAIError
|
||||||
|
raise original_exception
|
||||||
|
elif model:
|
||||||
|
error_str = str(original_exception)
|
||||||
|
if isinstance(original_exception, BaseException):
|
||||||
|
exception_type = type(original_exception).__name__
|
||||||
|
else:
|
||||||
|
exception_type = ""
|
||||||
|
if "claude" in model: #one of the anthropics
|
||||||
|
if "status_code" in original_exception:
|
||||||
|
print_verbose(f"status_code: {original_exception.status_code}")
|
||||||
|
if original_exception.status_code == 401:
|
||||||
|
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
|
||||||
|
elif original_exception.status_code == 400:
|
||||||
|
raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
|
||||||
|
elif original_exception.status_code == 429:
|
||||||
|
raise RateLimitError(f"AnthropicException - {original_exception.message}")
|
||||||
|
elif "replicate" in model:
|
||||||
|
if "Incorrect authentication token" in error_str:
|
||||||
|
raise AuthenticationError(f"ReplicateException - {error_str}")
|
||||||
|
elif exception_type == "ModelError":
|
||||||
|
raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
|
||||||
|
elif "Request was throttled" in error_str:
|
||||||
|
raise RateLimitError(f"ReplicateException - {error_str}")
|
||||||
|
elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
|
||||||
|
raise ServiceUnavailableError(f"ReplicateException - {error_str}")
|
||||||
|
elif model == "command-nightly": #Cohere
|
||||||
|
if "invalid api token" in error_str or "No API key provided." in error_str:
|
||||||
|
raise AuthenticationError(f"CohereException - {error_str}")
|
||||||
|
elif "too many tokens" in error_str:
|
||||||
|
raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
|
||||||
|
elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
|
||||||
|
raise RateLimitError(f"CohereException - {original_exception.message}")
|
||||||
|
raise original_exception # base case - return the original exception
|
||||||
|
else:
|
||||||
|
raise original_exception
|
||||||
|
except:
|
||||||
|
raise original_exception
|
||||||
|
|
||||||
|
def safe_crash_reporting(model=None, exception=None, azure=None):
|
||||||
|
data = {
|
||||||
|
"model": model,
|
||||||
|
"exception": str(exception),
|
||||||
|
"azure": azure
|
||||||
|
}
|
||||||
|
threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
|
||||||
|
|
||||||
|
def litellm_telemetry(data):
|
||||||
|
# Load or generate the UUID
|
||||||
|
uuid_file = 'litellm_uuid.txt'
|
||||||
|
try:
|
||||||
|
# Try to open the file and load the UUID
|
||||||
|
with open(uuid_file, 'r') as file:
|
||||||
|
uuid_value = file.read()
|
||||||
|
if uuid_value:
|
||||||
|
uuid_value = uuid_value.strip()
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Generate a new UUID if the file doesn't exist or is empty
|
||||||
|
new_uuid = uuid.uuid4()
|
||||||
|
uuid_value = str(new_uuid)
|
||||||
|
with open(uuid_file, 'w') as file:
|
||||||
|
file.write(uuid_value)
|
||||||
|
|
||||||
|
# Prepare the data to send to localhost:3000
|
||||||
|
payload = {
|
||||||
|
'uuid': uuid_value,
|
||||||
|
'data': data
|
||||||
|
}
|
||||||
|
print_verbose(f"payload: {payload}")
|
||||||
|
try:
|
||||||
|
# Make the POST request to localhost:3000
|
||||||
|
response = requests.post('https://litellm.berri.ai/logging', json=payload)
|
||||||
|
response.raise_for_status() # Raise an exception for HTTP errors
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
# Handle any errors in the request
|
||||||
|
pass
|
BIN
dist/litellm-0.1.2-py3-none-any.whl
vendored
BIN
dist/litellm-0.1.2-py3-none-any.whl
vendored
Binary file not shown.
BIN
dist/litellm-0.1.2.tar.gz
vendored
BIN
dist/litellm-0.1.2.tar.gz
vendored
Binary file not shown.
BIN
dist/litellm-0.1.216-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.216-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.216.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.216.tar.gz
vendored
Normal file
Binary file not shown.
|
@ -1,6 +1,6 @@
|
||||||
Metadata-Version: 2.1
|
Metadata-Version: 2.1
|
||||||
Name: litellm
|
Name: litellm
|
||||||
Version: 0.1.207
|
Version: 0.1.216
|
||||||
Summary: Library to easily interface with LLM API providers
|
Summary: Library to easily interface with LLM API providers
|
||||||
Author: BerriAI
|
Author: BerriAI
|
||||||
License-File: LICENSE
|
License-File: LICENSE
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
LICENSE
|
LICENSE
|
||||||
README.md
|
README.md
|
||||||
|
pyproject.toml
|
||||||
setup.py
|
setup.py
|
||||||
litellm/__init__.py
|
litellm/__init__.py
|
||||||
litellm/main.py
|
litellm/main.py
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -57,3 +57,4 @@ def test_good_azure_embedding():
|
||||||
print(f"response: {str(response)[:50]}")
|
print(f"response: {str(response)[:50]}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
23
litellm/tests/test_no_client.py
Normal file
23
litellm/tests/test_no_client.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
#### What this tests ####
|
||||||
|
# This tests error logging (with custom user functions) for the `completion` + `embedding` endpoints without callbacks (i.e. slack, posthog, etc. not set)
|
||||||
|
# Requirements: Remove any env keys you have related to slack/posthog/etc. + anthropic api key (cause an exception)
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
import traceback
|
||||||
|
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
||||||
|
import litellm
|
||||||
|
from litellm import embedding, completion
|
||||||
|
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
|
||||||
|
|
||||||
|
user_message = "Hello, how are you?"
|
||||||
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
|
||||||
|
for model in model_fallback_list:
|
||||||
|
try:
|
||||||
|
response = embedding(model="text-embedding-ada-002", input=[user_message])
|
||||||
|
response = completion(model=model, messages=messages)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
|
@ -234,6 +234,7 @@ def handle_success(*args, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
def exception_type(model, original_exception):
|
def exception_type(model, original_exception):
|
||||||
|
try:
|
||||||
if isinstance(original_exception, OpenAIError):
|
if isinstance(original_exception, OpenAIError):
|
||||||
# Handle the OpenAIError
|
# Handle the OpenAIError
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
@ -244,6 +245,7 @@ def exception_type(model, original_exception):
|
||||||
else:
|
else:
|
||||||
exception_type = ""
|
exception_type = ""
|
||||||
if "claude" in model: #one of the anthropics
|
if "claude" in model: #one of the anthropics
|
||||||
|
if "status_code" in original_exception:
|
||||||
print_verbose(f"status_code: {original_exception.status_code}")
|
print_verbose(f"status_code: {original_exception.status_code}")
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
|
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
|
||||||
|
@ -270,6 +272,8 @@ def exception_type(model, original_exception):
|
||||||
raise original_exception # base case - return the original exception
|
raise original_exception # base case - return the original exception
|
||||||
else:
|
else:
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
except:
|
||||||
|
raise original_exception
|
||||||
|
|
||||||
def safe_crash_reporting(model=None, exception=None, azure=None):
|
def safe_crash_reporting(model=None, exception=None, azure=None):
|
||||||
data = {
|
data = {
|
||||||
|
@ -277,11 +281,9 @@ def safe_crash_reporting(model=None, exception=None, azure=None):
|
||||||
"exception": str(exception),
|
"exception": str(exception),
|
||||||
"azure": azure
|
"azure": azure
|
||||||
}
|
}
|
||||||
print(f"data in crash reporting: {data}")
|
|
||||||
threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
|
threading.Thread(target=litellm_telemetry, args=(data,), daemon=True).start()
|
||||||
|
|
||||||
def litellm_telemetry(data):
|
def litellm_telemetry(data):
|
||||||
print(f"data in in litellm telemetry: {data}")
|
|
||||||
# Load or generate the UUID
|
# Load or generate the UUID
|
||||||
uuid_file = 'litellm_uuid.txt'
|
uuid_file = 'litellm_uuid.txt'
|
||||||
try:
|
try:
|
||||||
|
@ -290,7 +292,6 @@ def litellm_telemetry(data):
|
||||||
uuid_value = file.read()
|
uuid_value = file.read()
|
||||||
if uuid_value:
|
if uuid_value:
|
||||||
uuid_value = uuid_value.strip()
|
uuid_value = uuid_value.strip()
|
||||||
print(f"Loaded UUID: {uuid_value}")
|
|
||||||
else:
|
else:
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
@ -299,7 +300,6 @@ def litellm_telemetry(data):
|
||||||
uuid_value = str(new_uuid)
|
uuid_value = str(new_uuid)
|
||||||
with open(uuid_file, 'w') as file:
|
with open(uuid_file, 'w') as file:
|
||||||
file.write(uuid_value)
|
file.write(uuid_value)
|
||||||
print(f"Generated and stored UUID: {uuid_value}")
|
|
||||||
|
|
||||||
# Prepare the data to send to localhost:3000
|
# Prepare the data to send to localhost:3000
|
||||||
payload = {
|
payload = {
|
||||||
|
@ -311,7 +311,6 @@ def litellm_telemetry(data):
|
||||||
# Make the POST request to localhost:3000
|
# Make the POST request to localhost:3000
|
||||||
response = requests.post('https://litellm.berri.ai/logging', json=payload)
|
response = requests.post('https://litellm.berri.ai/logging', json=payload)
|
||||||
response.raise_for_status() # Raise an exception for HTTP errors
|
response.raise_for_status() # Raise an exception for HTTP errors
|
||||||
print('Request successfully sent!')
|
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
# Handle any errors in the request
|
# Handle any errors in the request
|
||||||
print(f'Error: {e}')
|
pass
|
2
setup.py
2
setup.py
|
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='litellm',
|
name='litellm',
|
||||||
version='0.1.214',
|
version='0.1.216',
|
||||||
description='Library to easily interface with LLM API providers',
|
description='Library to easily interface with LLM API providers',
|
||||||
author='BerriAI',
|
author='BerriAI',
|
||||||
packages=[
|
packages=[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue