forked from phoenix/litellm-mirror
version 0.1.2
This commit is contained in:
parent
740208d643
commit
fa65f960e3
18 changed files with 888 additions and 234 deletions
BIN
.DS_Store
vendored
BIN
.DS_Store
vendored
Binary file not shown.
|
@ -1,5 +1,4 @@
|
||||||
OPENAI_API_KEY = ""
|
OPENAI_API_KEY = ""
|
||||||
COHERE_API_KEY = ""
|
COHERE_API_KEY = ""
|
||||||
OPENROUTER_API_KEY = ""
|
|
||||||
OR_SITE_URL = ""
|
OR_SITE_URL = ""
|
||||||
OR_APP_NAME = "LiteLLM Example app"
|
OR_APP_NAME = "LiteLLM Example app"
|
|
@ -1,7 +1,17 @@
|
||||||
import os, openai, cohere, dotenv
|
import os, openai, cohere, replicate, sys
|
||||||
|
from typing import Any
|
||||||
|
from func_timeout import func_set_timeout, FunctionTimedOut
|
||||||
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
|
import json
|
||||||
|
import traceback
|
||||||
|
import threading
|
||||||
|
import dotenv
|
||||||
|
import traceback
|
||||||
|
import subprocess
|
||||||
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
# Loading env variables using dotenv
|
# Loading env variables using dotenv
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
set_verbose = False
|
||||||
|
|
||||||
####### COMPLETION MODELS ###################
|
####### COMPLETION MODELS ###################
|
||||||
open_ai_chat_completion_models = [
|
open_ai_chat_completion_models = [
|
||||||
|
@ -16,16 +26,9 @@ cohere_models = [
|
||||||
'command-nightly',
|
'command-nightly',
|
||||||
]
|
]
|
||||||
|
|
||||||
openrouter_models = [
|
anthropic_models = [
|
||||||
'google/palm-2-codechat-bison',
|
"claude-2",
|
||||||
'google/palm-2-chat-bison',
|
"claude-instant-1"
|
||||||
'openai/gpt-3.5-turbo',
|
|
||||||
'openai/gpt-3.5-turbo-16k',
|
|
||||||
'openai/gpt-4-32k',
|
|
||||||
'anthropic/claude-2',
|
|
||||||
'anthropic/claude-instant-v1',
|
|
||||||
'meta-llama/llama-2-13b-chat',
|
|
||||||
'meta-llama/llama-2-70b-chat'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
####### EMBEDDING MODELS ###################
|
####### EMBEDDING MODELS ###################
|
||||||
|
@ -38,28 +41,41 @@ open_ai_embedding_models = [
|
||||||
|
|
||||||
####### COMPLETION ENDPOINTS ################
|
####### COMPLETION ENDPOINTS ################
|
||||||
#############################################
|
#############################################
|
||||||
def completion(model, messages, azure=False):
|
@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure)
|
||||||
|
def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None):
|
||||||
|
try:
|
||||||
if azure == True:
|
if azure == True:
|
||||||
# azure configs
|
# azure configs
|
||||||
openai.api_type = "azure"
|
openai.api_type = "azure"
|
||||||
openai.api_base = os.environ.get("AZURE_API_BASE")
|
openai.api_base = os.environ.get("AZURE_API_BASE")
|
||||||
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
||||||
openai.api_key = os.environ.get("AZURE_API_KEY")
|
openai.api_key = os.environ.get("AZURE_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
engine=model,
|
engine=model,
|
||||||
messages = messages
|
messages = messages
|
||||||
)
|
)
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
|
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
||||||
|
# checking in case user set it to REPLICATE_API_KEY instead
|
||||||
|
if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
|
||||||
|
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
|
||||||
|
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
input = [{"prompt": prompt}]
|
||||||
|
if max_tokens:
|
||||||
|
input["max_length"] = max_tokens # for t5 models
|
||||||
|
input["max_new_tokens"] = max_tokens # for llama2 models
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
output = replicate.run(
|
output = replicate.run(
|
||||||
model,
|
model,
|
||||||
input={
|
input=input)
|
||||||
"prompt": prompt,
|
|
||||||
})
|
|
||||||
print(f"output: {output}")
|
|
||||||
response = ""
|
response = ""
|
||||||
for item in output:
|
for item in output:
|
||||||
print(f"item: {item}")
|
|
||||||
response += item
|
response += item
|
||||||
new_response = {
|
new_response = {
|
||||||
"choices": [
|
"choices": [
|
||||||
|
@ -73,12 +89,53 @@ def completion(model, messages, azure=False):
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
response = new_response
|
||||||
|
elif model in anthropic_models:
|
||||||
|
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
||||||
|
prompt = f"{HUMAN_PROMPT}"
|
||||||
|
for message in messages:
|
||||||
|
if "role" in message:
|
||||||
|
if message["role"] == "user":
|
||||||
|
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
||||||
|
else:
|
||||||
|
prompt += f"{AI_PROMPT}{message['content']}"
|
||||||
|
else:
|
||||||
|
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
||||||
|
prompt += f"{AI_PROMPT}"
|
||||||
|
anthropic = Anthropic()
|
||||||
|
if max_tokens:
|
||||||
|
max_tokens_to_sample = max_tokens
|
||||||
|
else:
|
||||||
|
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
|
completion = anthropic.completions.create(
|
||||||
|
model=model,
|
||||||
|
prompt=prompt,
|
||||||
|
max_tokens_to_sample=max_tokens_to_sample
|
||||||
|
)
|
||||||
|
new_response = {
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": completion.completion,
|
||||||
|
"role": "assistant"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
print(f"new response: {new_response}")
|
print(f"new response: {new_response}")
|
||||||
response = new_response
|
response = new_response
|
||||||
elif model in cohere_models:
|
elif model in cohere_models:
|
||||||
cohere_key = os.environ.get("COHERE_API_KEY")
|
cohere_key = os.environ.get("COHERE_API_KEY")
|
||||||
co = cohere.Client(cohere_key)
|
co = cohere.Client(cohere_key)
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
response = co.generate(
|
response = co.generate(
|
||||||
model=model,
|
model=model,
|
||||||
prompt = prompt
|
prompt = prompt
|
||||||
|
@ -95,7 +152,6 @@ def completion(model, messages, azure=False):
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
response = new_response
|
response = new_response
|
||||||
|
|
||||||
elif model in open_ai_chat_completion_models:
|
elif model in open_ai_chat_completion_models:
|
||||||
|
@ -103,6 +159,9 @@ def completion(model, messages, azure=False):
|
||||||
openai.api_base = "https://api.openai.com/v1"
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages = messages
|
messages = messages
|
||||||
|
@ -113,47 +172,258 @@ def completion(model, messages, azure=False):
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
response = openai.Completion.create(
|
response = openai.Completion.create(
|
||||||
model=model,
|
model=model,
|
||||||
prompt = prompt
|
prompt = prompt
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
elif model in openrouter_models:
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
openai.api_base = "https://openrouter.ai/api/v1"
|
|
||||||
openai.api_key = os.environ.get("OPENROUTER_API_KEY")
|
|
||||||
|
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
|
||||||
|
|
||||||
response = openai.ChatCompletion.create(
|
|
||||||
model=model,
|
|
||||||
messages=messages,
|
|
||||||
headers={
|
|
||||||
"HTTP-Referer": os.environ.get("OR_SITE_URL"), # To identify your app
|
|
||||||
"X-Title": os.environ.get("OR_APP_NAME")
|
|
||||||
},
|
|
||||||
)
|
|
||||||
reply = response.choices[0].message
|
|
||||||
return response
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
### EMBEDDING ENDPOINTS ####################
|
### EMBEDDING ENDPOINTS ####################
|
||||||
def embedding(model, input=[], azure=False):
|
@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/
|
||||||
|
def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
|
||||||
|
response = None
|
||||||
if azure == True:
|
if azure == True:
|
||||||
# azure configs
|
# azure configs
|
||||||
openai.api_type = "azure"
|
openai.api_type = "azure"
|
||||||
openai.api_base = os.environ.get("AZURE_API_BASE")
|
openai.api_base = os.environ.get("AZURE_API_BASE")
|
||||||
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
||||||
openai.api_key = os.environ.get("AZURE_API_KEY")
|
openai.api_key = os.environ.get("AZURE_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
## EMBEDDING CALL
|
||||||
response = openai.Embedding.create(input=input, engine=model)
|
response = openai.Embedding.create(input=input, engine=model)
|
||||||
|
print_verbose(f"response_value: {str(response)[:50]}")
|
||||||
elif model in open_ai_embedding_models:
|
elif model in open_ai_embedding_models:
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
openai.api_base = "https://api.openai.com/v1"
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
## EMBEDDING CALL
|
||||||
response = openai.Embedding.create(input=input, model=model)
|
response = openai.Embedding.create(input=input, model=model)
|
||||||
|
print_verbose(f"response_value: {str(response)[:50]}")
|
||||||
|
else:
|
||||||
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
#############################################
|
### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc.
|
||||||
#############################################
|
class litellm_client:
|
||||||
|
def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor
|
||||||
|
set_verbose = verbose
|
||||||
|
self.success_callback = success_callback
|
||||||
|
self.failure_callback = failure_callback
|
||||||
|
self.logger_fn = None # if user passes in their own logging function
|
||||||
|
self.callback_list = list(set(self.success_callback + self.failure_callback))
|
||||||
|
self.set_callbacks()
|
||||||
|
|
||||||
|
## COMPLETION CALL
|
||||||
|
def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any:
|
||||||
|
try:
|
||||||
|
self.logger_fn = logger_fn
|
||||||
|
response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input)
|
||||||
|
my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread
|
||||||
|
my_thread.start()
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
args = locals() # get all the param values
|
||||||
|
self.handle_failure(e, args)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
## EMBEDDING CALL
|
||||||
|
def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any:
|
||||||
|
try:
|
||||||
|
self.logger_fn = logger_fn
|
||||||
|
response = embedding(model, input, azure=azure, logger_fn=self.handle_input)
|
||||||
|
my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread
|
||||||
|
my_thread.start()
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
args = locals() # get all the param values
|
||||||
|
self.handle_failure(e, args)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def set_callbacks(self): #instantiate any external packages
|
||||||
|
for callback in self.callback_list: # only install what's required
|
||||||
|
if callback == "sentry":
|
||||||
|
try:
|
||||||
|
import sentry_sdk
|
||||||
|
except ImportError:
|
||||||
|
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
|
||||||
|
import sentry_sdk
|
||||||
|
self.sentry_sdk = sentry_sdk
|
||||||
|
self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
|
||||||
|
self.capture_exception = self.sentry_sdk.capture_exception
|
||||||
|
self.add_breadcrumb = self.sentry_sdk.add_breadcrumb
|
||||||
|
elif callback == "posthog":
|
||||||
|
try:
|
||||||
|
from posthog import Posthog
|
||||||
|
except:
|
||||||
|
print_verbose("Package 'posthog' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
|
||||||
|
from posthog import Posthog
|
||||||
|
self.posthog = Posthog(
|
||||||
|
project_api_key=os.environ.get("POSTHOG_API_KEY"),
|
||||||
|
host=os.environ.get("POSTHOG_API_URL"))
|
||||||
|
elif callback == "slack":
|
||||||
|
try:
|
||||||
|
from slack_bolt import App
|
||||||
|
except ImportError:
|
||||||
|
print_verbose("Package 'slack_bolt' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
|
||||||
|
from slack_bolt import App
|
||||||
|
self.slack_app = App(
|
||||||
|
token=os.environ.get("SLACK_API_TOKEN"),
|
||||||
|
signing_secret=os.environ.get("SLACK_API_SECRET")
|
||||||
|
)
|
||||||
|
self.alerts_channel = os.environ["SLACK_API_CHANNEL"]
|
||||||
|
|
||||||
|
def handle_input(self, model_call_details={}):
|
||||||
|
if len(model_call_details.keys()) > 0:
|
||||||
|
model = model_call_details["model"] if "model" in model_call_details else None
|
||||||
|
if model:
|
||||||
|
for callback in self.callback_list:
|
||||||
|
if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration
|
||||||
|
self.add_breadcrumb(
|
||||||
|
category=f'{model}',
|
||||||
|
message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)),
|
||||||
|
level='info',
|
||||||
|
)
|
||||||
|
if self.logger_fn and callable(self.logger_fn):
|
||||||
|
self.logger_fn(model_call_details)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def handle_success(self, model, messages, additional_details):
|
||||||
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
additional_details["litellm_model"] = str(model)
|
||||||
|
additional_details["litellm_messages"] = str(messages)
|
||||||
|
for callback in self.success_callback:
|
||||||
|
try:
|
||||||
|
if callback == "posthog":
|
||||||
|
ph_obj = {}
|
||||||
|
for detail in additional_details:
|
||||||
|
ph_obj[detail] = additional_details[detail]
|
||||||
|
event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query"
|
||||||
|
if "user_id" in additional_details:
|
||||||
|
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
||||||
|
else:
|
||||||
|
self.posthog.capture(event_name, ph_obj)
|
||||||
|
pass
|
||||||
|
elif callback == "slack":
|
||||||
|
slack_msg = ""
|
||||||
|
if len(additional_details.keys()) > 0:
|
||||||
|
for detail in additional_details:
|
||||||
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
|
slack_msg += f"Successful call"
|
||||||
|
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if success_handler and callable(success_handler):
|
||||||
|
call_details = {
|
||||||
|
"model": model,
|
||||||
|
"messages": messages,
|
||||||
|
"additional_details": additional_details
|
||||||
|
}
|
||||||
|
success_handler(call_details)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def handle_failure(self, exception, args):
|
||||||
|
args.pop("self")
|
||||||
|
additional_details = args.pop("additional_details", {})
|
||||||
|
|
||||||
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
|
||||||
|
for callback in self.failure_callback:
|
||||||
|
try:
|
||||||
|
if callback == "slack":
|
||||||
|
slack_msg = ""
|
||||||
|
for param in args:
|
||||||
|
slack_msg += f"{param}: {args[param]}\n"
|
||||||
|
if len(additional_details.keys()) > 0:
|
||||||
|
for detail in additional_details:
|
||||||
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
|
slack_msg += f"Traceback: {traceback.format_exc()}"
|
||||||
|
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
|
||||||
|
elif callback == "sentry":
|
||||||
|
self.capture_exception(exception)
|
||||||
|
elif callback == "posthog":
|
||||||
|
if len(additional_details.keys()) > 0:
|
||||||
|
ph_obj = {}
|
||||||
|
for param in args:
|
||||||
|
ph_obj[param] += args[param]
|
||||||
|
for detail in additional_details:
|
||||||
|
ph_obj[detail] = additional_details[detail]
|
||||||
|
event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query"
|
||||||
|
if "user_id" in additional_details:
|
||||||
|
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
||||||
|
else:
|
||||||
|
self.posthog.capture(event_name, ph_obj)
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
print(f"got an error calling {callback} - {traceback.format_exc()}")
|
||||||
|
|
||||||
|
if failure_handler and callable(failure_handler):
|
||||||
|
call_details = {
|
||||||
|
"exception": exception,
|
||||||
|
"additional_details": additional_details
|
||||||
|
}
|
||||||
|
failure_handler(call_details)
|
||||||
|
pass
|
||||||
|
####### HELPER FUNCTIONS ################
|
||||||
|
|
||||||
|
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||||
|
def logging(model, input, azure=False, additional_args={}, logger_fn=None):
|
||||||
|
try:
|
||||||
|
model_call_details = {}
|
||||||
|
model_call_details["model"] = model
|
||||||
|
model_call_details["input"] = input
|
||||||
|
model_call_details["azure"] = azure
|
||||||
|
model_call_details["additional_args"] = additional_args
|
||||||
|
if logger_fn and callable(logger_fn):
|
||||||
|
try:
|
||||||
|
# log additional call details -> api key, etc.
|
||||||
|
if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models:
|
||||||
|
model_call_details["api_type"] = openai.api_type
|
||||||
|
model_call_details["api_base"] = openai.api_base
|
||||||
|
model_call_details["api_version"] = openai.api_version
|
||||||
|
model_call_details["api_key"] = openai.api_key
|
||||||
|
elif "replicate" in model:
|
||||||
|
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
|
||||||
|
elif model in anthropic_models:
|
||||||
|
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
|
elif model in cohere_models:
|
||||||
|
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
||||||
|
|
||||||
|
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
|
||||||
|
except:
|
||||||
|
print_verbose(f"Basic model call details: {model_call_details}")
|
||||||
|
print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
print_verbose(f"Basic model call details: {model_call_details}")
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
## Set verbose to true -> ```litellm.verbose = True```
|
||||||
|
def print_verbose(print_statement):
|
||||||
|
if set_verbose:
|
||||||
|
print(f"LiteLLM: {print_statement}")
|
||||||
|
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
|
@ -27,8 +27,3 @@ print(response)
|
||||||
response = completion("command-nightly", messages)
|
response = completion("command-nightly", messages)
|
||||||
print("\nCohere call")
|
print("\nCohere call")
|
||||||
print(response)
|
print(response)
|
||||||
|
|
||||||
# openrouter call
|
|
||||||
response = completion("google/palm-2-codechat-bison", messages)
|
|
||||||
print("\OpenRouter call")
|
|
||||||
print(response)
|
|
BIN
dist/litellm-0.1.0-py3-none-any.whl
vendored
BIN
dist/litellm-0.1.0-py3-none-any.whl
vendored
Binary file not shown.
BIN
dist/litellm-0.1.0.tar.gz
vendored
BIN
dist/litellm-0.1.0.tar.gz
vendored
Binary file not shown.
BIN
dist/litellm-0.1.1-py3-none-any.whl
vendored
BIN
dist/litellm-0.1.1-py3-none-any.whl
vendored
Binary file not shown.
BIN
dist/litellm-0.1.1.tar.gz
vendored
BIN
dist/litellm-0.1.1.tar.gz
vendored
Binary file not shown.
BIN
dist/litellm-0.1.2-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.2-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.2.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.2.tar.gz
vendored
Normal file
Binary file not shown.
|
@ -1,12 +1,6 @@
|
||||||
Metadata-Version: 2.1
|
Metadata-Version: 2.1
|
||||||
Name: litellm
|
Name: litellm
|
||||||
Version: 0.1.1
|
Version: 0.1.2
|
||||||
Summary: Library to easily interface with LLM API providers
|
Summary: Library to easily interface with LLM API providers
|
||||||
Home-page: UNKNOWN
|
Author: BerriAI
|
||||||
Author: Ishaan Jaffer
|
|
||||||
License: UNKNOWN
|
|
||||||
Platform: UNKNOWN
|
|
||||||
License-File: LICENSE
|
License-File: LICENSE
|
||||||
|
|
||||||
UNKNOWN
|
|
||||||
|
|
||||||
|
|
BIN
litellm/.DS_Store
vendored
Normal file
BIN
litellm/.DS_Store
vendored
Normal file
Binary file not shown.
Binary file not shown.
347
litellm/main.py
347
litellm/main.py
|
@ -1,7 +1,17 @@
|
||||||
import os, openai, cohere, dotenv
|
import os, openai, cohere, replicate, sys
|
||||||
|
from typing import Any
|
||||||
|
from func_timeout import func_set_timeout, FunctionTimedOut
|
||||||
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
|
import json
|
||||||
|
import traceback
|
||||||
|
import threading
|
||||||
|
import dotenv
|
||||||
|
import traceback
|
||||||
|
import subprocess
|
||||||
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
# Loading env variables using dotenv
|
# Loading env variables using dotenv
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
set_verbose = False
|
||||||
|
|
||||||
####### COMPLETION MODELS ###################
|
####### COMPLETION MODELS ###################
|
||||||
open_ai_chat_completion_models = [
|
open_ai_chat_completion_models = [
|
||||||
|
@ -16,16 +26,9 @@ cohere_models = [
|
||||||
'command-nightly',
|
'command-nightly',
|
||||||
]
|
]
|
||||||
|
|
||||||
openrouter_models = [
|
anthropic_models = [
|
||||||
'google/palm-2-codechat-bison',
|
"claude-2",
|
||||||
'google/palm-2-chat-bison',
|
"claude-instant-1"
|
||||||
'openai/gpt-3.5-turbo',
|
|
||||||
'openai/gpt-3.5-turbo-16k',
|
|
||||||
'openai/gpt-4-32k',
|
|
||||||
'anthropic/claude-2',
|
|
||||||
'anthropic/claude-instant-v1',
|
|
||||||
'meta-llama/llama-2-13b-chat',
|
|
||||||
'meta-llama/llama-2-70b-chat'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
####### EMBEDDING MODELS ###################
|
####### EMBEDDING MODELS ###################
|
||||||
|
@ -38,28 +41,41 @@ open_ai_embedding_models = [
|
||||||
|
|
||||||
####### COMPLETION ENDPOINTS ################
|
####### COMPLETION ENDPOINTS ################
|
||||||
#############################################
|
#############################################
|
||||||
def completion(model, messages, azure=False):
|
@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure)
|
||||||
|
def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None):
|
||||||
|
try:
|
||||||
if azure == True:
|
if azure == True:
|
||||||
# azure configs
|
# azure configs
|
||||||
openai.api_type = "azure"
|
openai.api_type = "azure"
|
||||||
openai.api_base = os.environ.get("AZURE_API_BASE")
|
openai.api_base = os.environ.get("AZURE_API_BASE")
|
||||||
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
||||||
openai.api_key = os.environ.get("AZURE_API_KEY")
|
openai.api_key = os.environ.get("AZURE_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
engine=model,
|
engine=model,
|
||||||
messages = messages
|
messages = messages
|
||||||
)
|
)
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
|
# replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
|
||||||
|
# checking in case user set it to REPLICATE_API_KEY instead
|
||||||
|
if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
|
||||||
|
replicate_api_token = os.environ.get("REPLICATE_API_KEY")
|
||||||
|
os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
input = [{"prompt": prompt}]
|
||||||
|
if max_tokens:
|
||||||
|
input["max_length"] = max_tokens # for t5 models
|
||||||
|
input["max_new_tokens"] = max_tokens # for llama2 models
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
output = replicate.run(
|
output = replicate.run(
|
||||||
model,
|
model,
|
||||||
input={
|
input=input)
|
||||||
"prompt": prompt,
|
|
||||||
})
|
|
||||||
print(f"output: {output}")
|
|
||||||
response = ""
|
response = ""
|
||||||
for item in output:
|
for item in output:
|
||||||
print(f"item: {item}")
|
|
||||||
response += item
|
response += item
|
||||||
new_response = {
|
new_response = {
|
||||||
"choices": [
|
"choices": [
|
||||||
|
@ -73,12 +89,53 @@ def completion(model, messages, azure=False):
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
response = new_response
|
||||||
|
elif model in anthropic_models:
|
||||||
|
#anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
|
||||||
|
prompt = f"{HUMAN_PROMPT}"
|
||||||
|
for message in messages:
|
||||||
|
if "role" in message:
|
||||||
|
if message["role"] == "user":
|
||||||
|
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
||||||
|
else:
|
||||||
|
prompt += f"{AI_PROMPT}{message['content']}"
|
||||||
|
else:
|
||||||
|
prompt += f"{HUMAN_PROMPT}{message['content']}"
|
||||||
|
prompt += f"{AI_PROMPT}"
|
||||||
|
anthropic = Anthropic()
|
||||||
|
if max_tokens:
|
||||||
|
max_tokens_to_sample = max_tokens
|
||||||
|
else:
|
||||||
|
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
|
completion = anthropic.completions.create(
|
||||||
|
model=model,
|
||||||
|
prompt=prompt,
|
||||||
|
max_tokens_to_sample=max_tokens_to_sample
|
||||||
|
)
|
||||||
|
new_response = {
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": completion.completion,
|
||||||
|
"role": "assistant"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
print(f"new response: {new_response}")
|
print(f"new response: {new_response}")
|
||||||
response = new_response
|
response = new_response
|
||||||
elif model in cohere_models:
|
elif model in cohere_models:
|
||||||
cohere_key = os.environ.get("COHERE_API_KEY")
|
cohere_key = os.environ.get("COHERE_API_KEY")
|
||||||
co = cohere.Client(cohere_key)
|
co = cohere.Client(cohere_key)
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
response = co.generate(
|
response = co.generate(
|
||||||
model=model,
|
model=model,
|
||||||
prompt = prompt
|
prompt = prompt
|
||||||
|
@ -95,7 +152,6 @@ def completion(model, messages, azure=False):
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
response = new_response
|
response = new_response
|
||||||
|
|
||||||
elif model in open_ai_chat_completion_models:
|
elif model in open_ai_chat_completion_models:
|
||||||
|
@ -103,6 +159,9 @@ def completion(model, messages, azure=False):
|
||||||
openai.api_base = "https://api.openai.com/v1"
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages = messages
|
messages = messages
|
||||||
|
@ -113,48 +172,258 @@ def completion(model, messages, azure=False):
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
prompt = " ".join([message["content"] for message in messages])
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||||
|
## COMPLETION CALL
|
||||||
response = openai.Completion.create(
|
response = openai.Completion.create(
|
||||||
model=model,
|
model=model,
|
||||||
prompt = prompt
|
prompt = prompt
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
elif model in openrouter_models:
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
openai.api_base = "https://openrouter.ai/api/v1"
|
|
||||||
openai.api_key = os.environ.get("OPENROUTER_API_KEY")
|
|
||||||
|
|
||||||
prompt = " ".join([message["content"] for message in messages])
|
|
||||||
|
|
||||||
response = openai.ChatCompletion.create(
|
|
||||||
model=model,
|
|
||||||
messages=messages,
|
|
||||||
headers={
|
|
||||||
"HTTP-Referer": os.environ.get("OR_SITE_URL"), # To identify your app
|
|
||||||
"X-Title": os.environ.get("OR_APP_NAME")
|
|
||||||
},
|
|
||||||
)
|
|
||||||
reply = response.choices[0].message
|
|
||||||
return response
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
### EMBEDDING ENDPOINTS ####################
|
### EMBEDDING ENDPOINTS ####################
|
||||||
def embedding(model, input=[], azure=False):
|
@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/
|
||||||
|
def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None):
|
||||||
|
response = None
|
||||||
if azure == True:
|
if azure == True:
|
||||||
# azure configs
|
# azure configs
|
||||||
openai.api_type = "azure"
|
openai.api_type = "azure"
|
||||||
openai.api_base = os.environ.get("AZURE_API_BASE")
|
openai.api_base = os.environ.get("AZURE_API_BASE")
|
||||||
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
openai.api_version = os.environ.get("AZURE_API_VERSION")
|
||||||
openai.api_key = os.environ.get("AZURE_API_KEY")
|
openai.api_key = os.environ.get("AZURE_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
## EMBEDDING CALL
|
||||||
response = openai.Embedding.create(input=input, engine=model)
|
response = openai.Embedding.create(input=input, engine=model)
|
||||||
|
print_verbose(f"response_value: {str(response)[:50]}")
|
||||||
elif model in open_ai_embedding_models:
|
elif model in open_ai_embedding_models:
|
||||||
openai.api_type = "openai"
|
openai.api_type = "openai"
|
||||||
openai.api_base = "https://api.openai.com/v1"
|
openai.api_base = "https://api.openai.com/v1"
|
||||||
openai.api_version = None
|
openai.api_version = None
|
||||||
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
## LOGGING
|
||||||
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
## EMBEDDING CALL
|
||||||
response = openai.Embedding.create(input=input, model=model)
|
response = openai.Embedding.create(input=input, model=model)
|
||||||
|
print_verbose(f"response_value: {str(response)[:50]}")
|
||||||
|
else:
|
||||||
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
#############################################
|
### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc.
|
||||||
#############################################
|
class litellm_client:
|
||||||
|
def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor
|
||||||
|
set_verbose = verbose
|
||||||
|
self.success_callback = success_callback
|
||||||
|
self.failure_callback = failure_callback
|
||||||
|
self.logger_fn = None # if user passes in their own logging function
|
||||||
|
self.callback_list = list(set(self.success_callback + self.failure_callback))
|
||||||
|
self.set_callbacks()
|
||||||
|
|
||||||
|
## COMPLETION CALL
|
||||||
|
def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any:
|
||||||
|
try:
|
||||||
|
self.logger_fn = logger_fn
|
||||||
|
response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input)
|
||||||
|
my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread
|
||||||
|
my_thread.start()
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
args = locals() # get all the param values
|
||||||
|
self.handle_failure(e, args)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
## EMBEDDING CALL
|
||||||
|
def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any:
|
||||||
|
try:
|
||||||
|
self.logger_fn = logger_fn
|
||||||
|
response = embedding(model, input, azure=azure, logger_fn=self.handle_input)
|
||||||
|
my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread
|
||||||
|
my_thread.start()
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
args = locals() # get all the param values
|
||||||
|
self.handle_failure(e, args)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def set_callbacks(self): #instantiate any external packages
|
||||||
|
for callback in self.callback_list: # only install what's required
|
||||||
|
if callback == "sentry":
|
||||||
|
try:
|
||||||
|
import sentry_sdk
|
||||||
|
except ImportError:
|
||||||
|
print_verbose("Package 'sentry_sdk' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
|
||||||
|
import sentry_sdk
|
||||||
|
self.sentry_sdk = sentry_sdk
|
||||||
|
self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
|
||||||
|
self.capture_exception = self.sentry_sdk.capture_exception
|
||||||
|
self.add_breadcrumb = self.sentry_sdk.add_breadcrumb
|
||||||
|
elif callback == "posthog":
|
||||||
|
try:
|
||||||
|
from posthog import Posthog
|
||||||
|
except:
|
||||||
|
print_verbose("Package 'posthog' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog'])
|
||||||
|
from posthog import Posthog
|
||||||
|
self.posthog = Posthog(
|
||||||
|
project_api_key=os.environ.get("POSTHOG_API_KEY"),
|
||||||
|
host=os.environ.get("POSTHOG_API_URL"))
|
||||||
|
elif callback == "slack":
|
||||||
|
try:
|
||||||
|
from slack_bolt import App
|
||||||
|
except ImportError:
|
||||||
|
print_verbose("Package 'slack_bolt' is missing. Installing it...")
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt'])
|
||||||
|
from slack_bolt import App
|
||||||
|
self.slack_app = App(
|
||||||
|
token=os.environ.get("SLACK_API_TOKEN"),
|
||||||
|
signing_secret=os.environ.get("SLACK_API_SECRET")
|
||||||
|
)
|
||||||
|
self.alerts_channel = os.environ["SLACK_API_CHANNEL"]
|
||||||
|
|
||||||
|
def handle_input(self, model_call_details={}):
|
||||||
|
if len(model_call_details.keys()) > 0:
|
||||||
|
model = model_call_details["model"] if "model" in model_call_details else None
|
||||||
|
if model:
|
||||||
|
for callback in self.callback_list:
|
||||||
|
if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration
|
||||||
|
self.add_breadcrumb(
|
||||||
|
category=f'{model}',
|
||||||
|
message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)),
|
||||||
|
level='info',
|
||||||
|
)
|
||||||
|
if self.logger_fn and callable(self.logger_fn):
|
||||||
|
self.logger_fn(model_call_details)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def handle_success(self, model, messages, additional_details):
|
||||||
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
additional_details["litellm_model"] = str(model)
|
||||||
|
additional_details["litellm_messages"] = str(messages)
|
||||||
|
for callback in self.success_callback:
|
||||||
|
try:
|
||||||
|
if callback == "posthog":
|
||||||
|
ph_obj = {}
|
||||||
|
for detail in additional_details:
|
||||||
|
ph_obj[detail] = additional_details[detail]
|
||||||
|
event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query"
|
||||||
|
if "user_id" in additional_details:
|
||||||
|
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
||||||
|
else:
|
||||||
|
self.posthog.capture(event_name, ph_obj)
|
||||||
|
pass
|
||||||
|
elif callback == "slack":
|
||||||
|
slack_msg = ""
|
||||||
|
if len(additional_details.keys()) > 0:
|
||||||
|
for detail in additional_details:
|
||||||
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
|
slack_msg += f"Successful call"
|
||||||
|
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if success_handler and callable(success_handler):
|
||||||
|
call_details = {
|
||||||
|
"model": model,
|
||||||
|
"messages": messages,
|
||||||
|
"additional_details": additional_details
|
||||||
|
}
|
||||||
|
success_handler(call_details)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def handle_failure(self, exception, args):
|
||||||
|
args.pop("self")
|
||||||
|
additional_details = args.pop("additional_details", {})
|
||||||
|
|
||||||
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
|
||||||
|
for callback in self.failure_callback:
|
||||||
|
try:
|
||||||
|
if callback == "slack":
|
||||||
|
slack_msg = ""
|
||||||
|
for param in args:
|
||||||
|
slack_msg += f"{param}: {args[param]}\n"
|
||||||
|
if len(additional_details.keys()) > 0:
|
||||||
|
for detail in additional_details:
|
||||||
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
|
slack_msg += f"Traceback: {traceback.format_exc()}"
|
||||||
|
self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg)
|
||||||
|
elif callback == "sentry":
|
||||||
|
self.capture_exception(exception)
|
||||||
|
elif callback == "posthog":
|
||||||
|
if len(additional_details.keys()) > 0:
|
||||||
|
ph_obj = {}
|
||||||
|
for param in args:
|
||||||
|
ph_obj[param] += args[param]
|
||||||
|
for detail in additional_details:
|
||||||
|
ph_obj[detail] = additional_details[detail]
|
||||||
|
event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query"
|
||||||
|
if "user_id" in additional_details:
|
||||||
|
self.posthog.capture(additional_details["user_id"], event_name, ph_obj)
|
||||||
|
else:
|
||||||
|
self.posthog.capture(event_name, ph_obj)
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
print(f"got an error calling {callback} - {traceback.format_exc()}")
|
||||||
|
|
||||||
|
if failure_handler and callable(failure_handler):
|
||||||
|
call_details = {
|
||||||
|
"exception": exception,
|
||||||
|
"additional_details": additional_details
|
||||||
|
}
|
||||||
|
failure_handler(call_details)
|
||||||
|
pass
|
||||||
|
####### HELPER FUNCTIONS ################
|
||||||
|
|
||||||
|
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||||
|
def logging(model, input, azure=False, additional_args={}, logger_fn=None):
|
||||||
|
try:
|
||||||
|
model_call_details = {}
|
||||||
|
model_call_details["model"] = model
|
||||||
|
model_call_details["input"] = input
|
||||||
|
model_call_details["azure"] = azure
|
||||||
|
model_call_details["additional_args"] = additional_args
|
||||||
|
if logger_fn and callable(logger_fn):
|
||||||
|
try:
|
||||||
|
# log additional call details -> api key, etc.
|
||||||
|
if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models:
|
||||||
|
model_call_details["api_type"] = openai.api_type
|
||||||
|
model_call_details["api_base"] = openai.api_base
|
||||||
|
model_call_details["api_version"] = openai.api_version
|
||||||
|
model_call_details["api_key"] = openai.api_key
|
||||||
|
elif "replicate" in model:
|
||||||
|
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
|
||||||
|
elif model in anthropic_models:
|
||||||
|
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
|
elif model in cohere_models:
|
||||||
|
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
||||||
|
|
||||||
|
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
|
||||||
|
except:
|
||||||
|
print_verbose(f"Basic model call details: {model_call_details}")
|
||||||
|
print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
print_verbose(f"Basic model call details: {model_call_details}")
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
## Set verbose to true -> ```litellm.verbose = True```
|
||||||
|
def print_verbose(print_statement):
|
||||||
|
if set_verbose:
|
||||||
|
print(f"LiteLLM: {print_statement}")
|
||||||
|
print("Get help - https://discord.com/invite/wuPM9dRgDw")
|
20
litellm/tests/test_bad_params.py
Normal file
20
litellm/tests/test_bad_params.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
import sys, os
|
||||||
|
import traceback
|
||||||
|
sys.path.append('..') # Adds the parent directory to the system path
|
||||||
|
import main
|
||||||
|
from main import litellm_client
|
||||||
|
client = litellm_client(success_callback=["posthog"], failure_callback=["slack", "sentry", "posthog"], verbose=True)
|
||||||
|
completion = client.completion
|
||||||
|
embedding = client.embedding
|
||||||
|
|
||||||
|
main.set_verbose = True
|
||||||
|
|
||||||
|
user_message = "Hello, how are you?"
|
||||||
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
model_val = None
|
||||||
|
# test on empty
|
||||||
|
try:
|
||||||
|
response = completion(model=model_val, messages=messages)
|
||||||
|
except:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
pass
|
59
litellm/tests/test_client.py
Normal file
59
litellm/tests/test_client.py
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
import sys, os
|
||||||
|
import traceback
|
||||||
|
sys.path.append('..') # Adds the parent directory to the system path
|
||||||
|
import main
|
||||||
|
from main import litellm_client
|
||||||
|
client = litellm_client(success_callback=["posthog"], failure_callback=["slack", "sentry", "posthog"], verbose=True)
|
||||||
|
completion = client.completion
|
||||||
|
embedding = client.embedding
|
||||||
|
|
||||||
|
main.set_verbose = True
|
||||||
|
|
||||||
|
def logger_fn(model_call_object: dict):
|
||||||
|
print(f"model call details: {model_call_object}")
|
||||||
|
|
||||||
|
user_message = "Hello, how are you?"
|
||||||
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
|
||||||
|
# test on openai completion call
|
||||||
|
try:
|
||||||
|
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
|
||||||
|
except:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# test on openai completion call
|
||||||
|
try:
|
||||||
|
response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn)
|
||||||
|
except:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
# test on non-openai completion call
|
||||||
|
try:
|
||||||
|
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
||||||
|
except:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
# test on openai embedding call
|
||||||
|
try:
|
||||||
|
response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
|
||||||
|
print(f"response: {str(response)[:50]}")
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
|
||||||
|
try:
|
||||||
|
response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
|
||||||
|
print(f"response: {str(response)[:50]}")
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# test on good azure openai embedding call
|
||||||
|
try:
|
||||||
|
response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
|
||||||
|
print(f"response: {str(response)[:50]}")
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
48
litellm/tests/test_logging.py
Normal file
48
litellm/tests/test_logging.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
import sys, os
|
||||||
|
import traceback
|
||||||
|
sys.path.append('..') # Adds the parent directory to the system path
|
||||||
|
import main
|
||||||
|
from main import completion, embedding
|
||||||
|
|
||||||
|
main.verbose = True ## Replace to: ```litellm.verbose = True``` when using pypi package
|
||||||
|
|
||||||
|
def logger_fn(model_call_object: dict):
|
||||||
|
print(f"model call details: {model_call_object}")
|
||||||
|
|
||||||
|
user_message = "Hello, how are you?"
|
||||||
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
|
||||||
|
# test on openai completion call
|
||||||
|
try:
|
||||||
|
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
|
except:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
# test on non-openai completion call
|
||||||
|
try:
|
||||||
|
response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
|
||||||
|
except:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
# test on openai embedding call
|
||||||
|
try:
|
||||||
|
response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
|
||||||
|
print(f"response: {str(response)[:50]}")
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
|
||||||
|
try:
|
||||||
|
response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
|
||||||
|
print(f"response: {str(response)[:50]}")
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# test on good azure openai embedding call
|
||||||
|
try:
|
||||||
|
response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
|
||||||
|
print(f"response: {str(response)[:50]}")
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
4
setup.py
4
setup.py
|
@ -2,9 +2,9 @@ from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='litellm',
|
name='litellm',
|
||||||
version='0.1.202',
|
version='0.1.2',
|
||||||
description='Library to easily interface with LLM API providers',
|
description='Library to easily interface with LLM API providers',
|
||||||
author='Ishaan Jaffer',
|
author='BerriAI',
|
||||||
packages=[
|
packages=[
|
||||||
'litellm'
|
'litellm'
|
||||||
],
|
],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue