new logger client

This commit is contained in:
Krrish Dholakia 2023-08-28 14:56:20 -07:00
parent d48763a92f
commit a0f882d507
9 changed files with 235 additions and 195 deletions

View file

@ -1,5 +1,5 @@
import requests, traceback, json, os import requests, traceback, json, os
import types
class LiteDebugger: class LiteDebugger:
user_email = None user_email = None
@ -7,13 +7,12 @@ class LiteDebugger:
def __init__(self, email=None): def __init__(self, email=None):
self.api_url = "https://api.litellm.ai/debugger" self.api_url = "https://api.litellm.ai/debugger"
# self.api_url = "http://0.0.0.0:4000/debugger"
self.validate_environment(email) self.validate_environment(email)
pass pass
def validate_environment(self, email): def validate_environment(self, email):
try: try:
self.user_email = os.getenv("LITELLM_EMAIL") or email self.user_email = (email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL"))
self.dashboard_url = "https://admin.litellm.ai/" + self.user_email self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
try: try:
print( print(
@ -23,11 +22,11 @@ class LiteDebugger:
print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}") print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}")
if self.user_email == None: if self.user_email == None:
raise Exception( raise Exception(
"[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= <your_email>" "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
) )
except Exception as e: except Exception as e:
raise ValueError( raise ValueError(
"[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= <your_email>" "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
) )
def input_log_event( def input_log_event(
@ -36,6 +35,7 @@ class LiteDebugger:
messages, messages,
end_user, end_user,
litellm_call_id, litellm_call_id,
call_type,
print_verbose, print_verbose,
litellm_params, litellm_params,
optional_params, optional_params,
@ -52,39 +52,76 @@ class LiteDebugger:
updated_litellm_params = remove_key_value(litellm_params, "logger_fn") updated_litellm_params = remove_key_value(litellm_params, "logger_fn")
litellm_data_obj = { if call_type == "embedding":
"model": model, for message in messages: # assuming the input is a list as required by the embedding function
"messages": messages, litellm_data_obj = {
"end_user": end_user, "model": model,
"status": "initiated", "messages": [{"role": "user", "content": message}],
"litellm_call_id": litellm_call_id, "end_user": end_user,
"user_email": self.user_email, "status": "initiated",
"litellm_params": updated_litellm_params, "litellm_call_id": litellm_call_id,
"optional_params": optional_params, "user_email": self.user_email,
} "litellm_params": updated_litellm_params,
print_verbose( "optional_params": optional_params,
f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}" }
) print_verbose(
response = requests.post( f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
url=self.api_url, )
headers={"content-type": "application/json"}, response = requests.post(
data=json.dumps(litellm_data_obj), url=self.api_url,
) headers={"content-type": "application/json"},
print_verbose(f"LiteDebugger: api response - {response.text}") data=json.dumps(litellm_data_obj),
)
print_verbose(f"LiteDebugger: embedding api response - {response.text}")
elif call_type == "completion":
litellm_data_obj = {
"model": model,
"messages": messages if isinstance(messages, list) else [{"role": "user", "content": messages}],
"end_user": end_user,
"status": "initiated",
"litellm_call_id": litellm_call_id,
"user_email": self.user_email,
"litellm_params": updated_litellm_params,
"optional_params": optional_params,
}
print_verbose(
f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
)
response = requests.post(
url=self.api_url,
headers={"content-type": "application/json"},
data=json.dumps(litellm_data_obj),
)
print_verbose(f"LiteDebugger: completion api response - {response.text}")
except: except:
print_verbose( print_verbose(
f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}" f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
) )
pass pass
def post_call_log_event(self, original_response, litellm_call_id, print_verbose): def post_call_log_event(self, original_response, litellm_call_id, print_verbose, call_type, stream):
try: try:
litellm_data_obj = { if call_type == "embedding":
"status": "received", litellm_data_obj = {
"additional_details": {"original_response": original_response}, "status": "received",
"litellm_call_id": litellm_call_id, "additional_details": {"original_response": str(original_response["data"][0]["embedding"][:5])}, # don't store the entire vector
"user_email": self.user_email, "litellm_call_id": litellm_call_id,
} "user_email": self.user_email,
}
elif call_type == "completion" and not stream:
litellm_data_obj = {
"status": "received",
"additional_details": {"original_response": original_response},
"litellm_call_id": litellm_call_id,
"user_email": self.user_email,
}
elif call_type == "completion" and stream:
litellm_data_obj = {
"status": "received",
"additional_details": {"original_response": "Streamed response" if isinstance(original_response, types.GeneratorType) else original_response},
"litellm_call_id": litellm_call_id,
"user_email": self.user_email,
}
response = requests.post( response = requests.post(
url=self.api_url, url=self.api_url,
headers={"content-type": "application/json"}, headers={"content-type": "application/json"},
@ -98,32 +135,28 @@ class LiteDebugger:
def log_event( def log_event(
self, self,
model,
messages,
end_user, end_user,
response_obj, response_obj,
start_time, start_time,
end_time, end_time,
litellm_call_id, litellm_call_id,
print_verbose, print_verbose,
call_type,
stream = False
): ):
try: try:
print_verbose( print_verbose(
f"LiteLLMDebugger: Logging - Enters handler logging function for model {model} with response object {response_obj}" f"LiteLLMDebugger: Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
) )
total_cost = 0 # [TODO] implement cost tracking total_cost = 0 # [TODO] implement cost tracking
response_time = (end_time - start_time).total_seconds() response_time = (end_time - start_time).total_seconds()
if "choices" in response_obj: if call_type == "completion" and stream == False:
litellm_data_obj = { litellm_data_obj = {
"response_time": response_time, "response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost, "total_cost": total_cost,
"messages": messages, "response": response_obj["choices"][0]["message"]["content"],
"response": response["choices"][0]["message"]["content"],
"end_user": end_user,
"litellm_call_id": litellm_call_id, "litellm_call_id": litellm_call_id,
"status": "success", "status": "success",
"user_email": self.user_email,
} }
print_verbose( print_verbose(
f"LiteDebugger: Logging - final data object: {litellm_data_obj}" f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
@ -133,45 +166,26 @@ class LiteDebugger:
headers={"content-type": "application/json"}, headers={"content-type": "application/json"},
data=json.dumps(litellm_data_obj), data=json.dumps(litellm_data_obj),
) )
elif ( elif call_type == "embedding":
"data" in response_obj
and isinstance(response_obj["data"], list)
and len(response_obj["data"]) > 0
and "embedding" in response_obj["data"][0]
):
print(f"messages: {messages}")
litellm_data_obj = { litellm_data_obj = {
"response_time": response_time, "response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost, "total_cost": total_cost,
"messages": messages,
"response": str(response_obj["data"][0]["embedding"][:5]), "response": str(response_obj["data"][0]["embedding"][:5]),
"end_user": end_user,
"litellm_call_id": litellm_call_id, "litellm_call_id": litellm_call_id,
"status": "success", "status": "success",
"user_email": self.user_email,
} }
print_verbose(
f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
)
response = requests.post( response = requests.post(
url=self.api_url, url=self.api_url,
headers={"content-type": "application/json"}, headers={"content-type": "application/json"},
data=json.dumps(litellm_data_obj), data=json.dumps(litellm_data_obj),
) )
elif ( elif call_type == "completion" and stream == True:
isinstance(response_obj, object)
and response_obj.__class__.__name__ == "CustomStreamWrapper"
):
litellm_data_obj = { litellm_data_obj = {
"response_time": response_time, "response_time": response_time,
"total_cost": total_cost, "total_cost": total_cost,
"messages": messages, "response": "streamed response",
"response": "Streamed response",
"end_user": end_user,
"litellm_call_id": litellm_call_id, "litellm_call_id": litellm_call_id,
"status": "success", "status": "success",
"user_email": self.user_email,
} }
print_verbose( print_verbose(
f"LiteDebugger: Logging - final data object: {litellm_data_obj}" f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
@ -188,7 +202,6 @@ class LiteDebugger:
"response_time": response_time, "response_time": response_time,
"model": response_obj["model"], "model": response_obj["model"],
"total_cost": total_cost, "total_cost": total_cost,
"messages": messages,
"error": response_obj["error"], "error": response_obj["error"],
"end_user": end_user, "end_user": end_user,
"litellm_call_id": litellm_call_id, "litellm_call_id": litellm_call_id,

View file

@ -31,7 +31,7 @@ class AI21LLM:
# set the api key # set the api key
if self.api_key == None: if self.api_key == None:
raise ValueError( raise ValueError(
"Missing Baseten API Key - A call is being made to baseten but no key is set either in the environment variables or via params" "Missing AI21 API Key - A call is being made to ai21 but no key is set either in the environment variables or via params"
) )
self.api_key = api_key self.api_key = api_key
self.headers = { self.headers = {

View file

@ -92,6 +92,7 @@ def completion(
custom_llm_provider=None, custom_llm_provider=None,
custom_api_base=None, custom_api_base=None,
litellm_call_id=None, litellm_call_id=None,
litellm_logging_obj=None,
# model specific optional params # model specific optional params
# used by text-bison only # used by text-bison only
top_k=40, top_k=40,
@ -100,6 +101,7 @@ def completion(
) -> ModelResponse: ) -> ModelResponse:
args = locals() args = locals()
try: try:
logging = litellm_logging_obj
if fallbacks != []: if fallbacks != []:
return completion_with_fallbacks(**args) return completion_with_fallbacks(**args)
if litellm.model_alias_map and model in litellm.model_alias_map: if litellm.model_alias_map and model in litellm.model_alias_map:
@ -151,12 +153,7 @@ def completion(
litellm_call_id=litellm_call_id, litellm_call_id=litellm_call_id,
model_alias_map=litellm.model_alias_map, model_alias_map=litellm.model_alias_map,
) )
logging = Logging( logging.update_environment_variables(optional_params=optional_params, litellm_params=litellm_params)
model=model,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
)
if custom_llm_provider == "azure": if custom_llm_provider == "azure":
# azure configs # azure configs
openai.api_type = "azure" openai.api_type = "azure"
@ -306,7 +303,7 @@ def completion(
response = openai.Completion.create(model=model, prompt=prompt, **optional_params) response = openai.Completion.create(model=model, prompt=prompt, **optional_params)
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
response = CustomStreamWrapper(response, model) response = CustomStreamWrapper(response, model, logging_obj=logging)
return response return response
## LOGGING ## LOGGING
logging.post_call( logging.post_call(
@ -363,7 +360,7 @@ def completion(
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object, # don't try to access stream object,
# let the stream handler know this is replicate # let the stream handler know this is replicate
response = CustomStreamWrapper(output, "replicate") response = CustomStreamWrapper(output, "replicate", logging_obj=logging)
return response return response
response = "" response = ""
for item in output: for item in output:
@ -413,7 +410,7 @@ def completion(
) )
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object, # don't try to access stream object,
response = CustomStreamWrapper(model_response, model) response = CustomStreamWrapper(model_response, model, logging_obj=logging)
return response return response
response = model_response response = model_response
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter": elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
@ -486,7 +483,7 @@ def completion(
response = co.generate(model=model, prompt=prompt, **optional_params) response = co.generate(model=model, prompt=prompt, **optional_params)
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object, # don't try to access stream object,
response = CustomStreamWrapper(response, model) response = CustomStreamWrapper(response, model, logging_obj=logging)
return response return response
## LOGGING ## LOGGING
logging.post_call( logging.post_call(
@ -532,7 +529,7 @@ def completion(
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object, # don't try to access stream object,
response = CustomStreamWrapper( response = CustomStreamWrapper(
model_response, model, custom_llm_provider="huggingface" model_response, model, custom_llm_provider="huggingface", logging_obj=logging
) )
return response return response
response = model_response response = model_response
@ -572,7 +569,7 @@ def completion(
headers=headers, headers=headers,
) )
response = CustomStreamWrapper( response = CustomStreamWrapper(
res.iter_lines(), model, custom_llm_provider="together_ai" res.iter_lines(), model, custom_llm_provider="together_ai", logging_obj=logging
) )
return response return response
else: else:
@ -689,7 +686,7 @@ def completion(
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object, # don't try to access stream object,
response = CustomStreamWrapper( response = CustomStreamWrapper(
model_response, model, custom_llm_provider="ai21" model_response, model, custom_llm_provider="ai21", logging_obj=logging
) )
return response return response
@ -732,7 +729,7 @@ def completion(
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object, # don't try to access stream object,
response = CustomStreamWrapper( response = CustomStreamWrapper(
model_response, model, custom_llm_provider="baseten" model_response, model, custom_llm_provider="baseten", logging_obj=logging
) )
return response return response
response = model_response response = model_response
@ -775,8 +772,6 @@ def completion(
) )
return response return response
except Exception as e: except Exception as e:
## LOGGING
logging.post_call(input=messages, api_key=api_key, original_response=e)
## Map to OpenAI Exception ## Map to OpenAI Exception
raise exception_type( raise exception_type(
model=model, custom_llm_provider=custom_llm_provider, original_exception=e model=model, custom_llm_provider=custom_llm_provider, original_exception=e
@ -816,21 +811,12 @@ def batch_completion(*args, **kwargs):
60 60
) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` ) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
def embedding( def embedding(
model, input=[], azure=False, force_timeout=60, litellm_call_id=None, logger_fn=None model, input=[], azure=False, force_timeout=60, litellm_call_id=None, litellm_logging_obj=None, logger_fn=None
): ):
try: try:
response = None response = None
logging = Logging( logging = litellm_logging_obj
model=model, logging.update_environment_variables(optional_params={}, litellm_params={"force_timeout": force_timeout, "azure": azure, "litellm_call_id": litellm_call_id, "logger_fn": logger_fn})
messages=input,
optional_params={},
litellm_params={
"azure": azure,
"force_timeout": force_timeout,
"logger_fn": logger_fn,
"litellm_call_id": litellm_call_id,
},
)
if azure == True: if azure == True:
# azure configs # azure configs
openai.api_type = "azure" openai.api_type = "azure"
@ -849,7 +835,6 @@ def embedding(
) )
## EMBEDDING CALL ## EMBEDDING CALL
response = openai.Embedding.create(input=input, engine=model) response = openai.Embedding.create(input=input, engine=model)
print_verbose(f"response_value: {str(response)[:100]}")
elif model in litellm.open_ai_embedding_models: elif model in litellm.open_ai_embedding_models:
openai.api_type = "openai" openai.api_type = "openai"
openai.api_base = "https://api.openai.com/v1" openai.api_base = "https://api.openai.com/v1"
@ -867,15 +852,13 @@ def embedding(
) )
## EMBEDDING CALL ## EMBEDDING CALL
response = openai.Embedding.create(input=input, model=model) response = openai.Embedding.create(input=input, model=model)
print_verbose(f"response_value: {str(response)[:100]}")
else: else:
args = locals() args = locals()
raise ValueError(f"No valid embedding model args passed in - {args}") raise ValueError(f"No valid embedding model args passed in - {args}")
## LOGGING
logging.post_call(input=input, api_key=openai.api_key, original_response=response)
return response return response
except Exception as e: except Exception as e:
## LOGGING
logging.post_call(input=input, api_key=openai.api_key, original_response=e)
## Map to OpenAI Exception ## Map to OpenAI Exception
raise exception_type( raise exception_type(
model=model, model=model,

View file

@ -1,24 +1,30 @@
# #### What this tests #### #### What this tests ####
# # This tests if logging to the litedebugger integration actually works # This tests if logging to the litedebugger integration actually works
# # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
# import sys, os import sys, os
# import traceback import traceback
# import pytest import pytest
# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
# import litellm import litellm
# from litellm import embedding, completion from litellm import embedding, completion
# litellm.set_verbose = True litellm.set_verbose = True
# litellm.email = "krrish@berri.ai" litellm.use_client = True
# user_message = "Hello, how are you?" user_message = "Hello, how are you?"
# messages = [{ "content": user_message,"role": "user"}] messages = [{ "content": user_message,"role": "user"}]
# #openai call # Test 1: On completion call
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) response = completion(model="claude-instant-1", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# print(f"response: {response}") # print(f"response: {response}")
# #bad request call
# # response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) # # Test 2: On embedding call
# response = embedding(model="text-embedding-ada-002", input=["sample text"])
# print(f"response: {response}")
# # Test 3: On streaming completion call
response = completion(model="replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}], stream=True)
print(f"response: {response}")

View file

@ -141,27 +141,41 @@ def install_and_import(package: str):
####### LOGGING ################### ####### LOGGING ###################
from enum import Enum
class CallTypes(Enum):
embedding = 'embedding'
completion = 'completion'
# Logging function -> log the exact model details + what's being sent | Non-Blocking # Logging function -> log the exact model details + what's being sent | Non-Blocking
class Logging: class Logging:
global supabaseClient, liteDebuggerClient global supabaseClient, liteDebuggerClient
def __init__(self, model, messages, optional_params, litellm_params): def __init__(self, model, messages, stream, call_type, litellm_call_id):
if call_type not in [item.value for item in CallTypes]:
allowed_values = ", ".join([item.value for item in CallTypes])
raise ValueError(f"Invalid call_type {call_type}. Allowed values: {allowed_values}")
self.model = model self.model = model
self.messages = messages self.messages = messages
self.stream = stream
self.call_type = call_type
self.litellm_call_id = litellm_call_id
def update_environment_variables(self, optional_params, litellm_params):
self.optional_params = optional_params self.optional_params = optional_params
self.litellm_params = litellm_params self.litellm_params = litellm_params
self.logger_fn = litellm_params["logger_fn"] self.logger_fn = litellm_params["logger_fn"]
print_verbose(f"self.optional_params: {self.optional_params}") print_verbose(f"self.optional_params: {self.optional_params}")
self.model_call_details = { self.model_call_details = {
"model": model, "model": self.model,
"messages": messages, "messages": self.messages,
"optional_params": self.optional_params, "optional_params": self.optional_params,
"litellm_params": self.litellm_params, "litellm_params": self.litellm_params,
} }
def pre_call(self, input, api_key, model=None, additional_args={}): def pre_call(self, input, api_key, model=None, additional_args={}):
try: try:
print_verbose(f"logging pre call for model: {self.model}") print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}")
self.model_call_details["input"] = input self.model_call_details["input"] = input
self.model_call_details["api_key"] = api_key self.model_call_details["api_key"] = api_key
self.model_call_details["additional_args"] = additional_args self.model_call_details["additional_args"] = additional_args
@ -215,6 +229,7 @@ class Logging:
litellm_params=self.model_call_details["litellm_params"], litellm_params=self.model_call_details["litellm_params"],
optional_params=self.model_call_details["optional_params"], optional_params=self.model_call_details["optional_params"],
print_verbose=print_verbose, print_verbose=print_verbose,
call_type=self.call_type,
) )
except Exception as e: except Exception as e:
print_verbose( print_verbose(
@ -235,7 +250,7 @@ class Logging:
if capture_exception: # log this error to sentry for debugging if capture_exception: # log this error to sentry for debugging
capture_exception(e) capture_exception(e)
def post_call(self, input, api_key, original_response, additional_args={}): def post_call(self, original_response, input=None, api_key=None, additional_args={}):
# Do something here # Do something here
try: try:
self.model_call_details["input"] = input self.model_call_details["input"] = input
@ -262,13 +277,13 @@ class Logging:
try: try:
if callback == "lite_debugger": if callback == "lite_debugger":
print_verbose("reaches litedebugger for post-call logging!") print_verbose("reaches litedebugger for post-call logging!")
model = self.model_call_details["model"]
messages = self.model_call_details["input"]
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
liteDebuggerClient.post_call_log_event( liteDebuggerClient.post_call_log_event(
original_response=original_response, original_response=original_response,
litellm_call_id=self.litellm_params["litellm_call_id"], litellm_call_id=self.litellm_params["litellm_call_id"],
print_verbose=print_verbose, print_verbose=print_verbose,
call_type = self.call_type,
stream = self.stream
) )
except: except:
print_verbose( print_verbose(
@ -285,7 +300,72 @@ class Logging:
) )
pass pass
# Add more methods as needed
def success_handler(self, result, start_time, end_time):
try:
for callback in litellm.success_callback:
try:
if callback == "lite_debugger":
print_verbose("reaches lite_debugger for logging!")
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
print_verbose(f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}")
liteDebuggerClient.log_event(
end_user=litellm._thread_context.user,
response_obj=result,
start_time=start_time,
end_time=end_time,
litellm_call_id=self.litellm_call_id,
print_verbose=print_verbose,
call_type = self.call_type,
stream = self.stream
)
except Exception as e:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}"
)
print_verbose(
f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
)
if capture_exception: # log this error to sentry for debugging
capture_exception(e)
except:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
)
pass
def failure_handler(self, exception, traceback_exception, start_time, end_time):
try:
for callback in litellm.failure_callback:
if callback == "lite_debugger":
print_verbose("reaches lite_debugger for logging!")
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
result = {
"model": self.model,
"created": time.time(),
"error": traceback_exception,
"usage": {
"prompt_tokens": prompt_token_calculator(
self.model, messages=self.messages
),
"completion_tokens": 0,
},
}
liteDebuggerClient.log_event(
model=self.model,
messages=self.messages,
end_user=litellm._thread_context.user,
response_obj=result,
start_time=start_time,
end_time=end_time,
litellm_call_id=self.litellm_call_id,
print_verbose=print_verbose,
call_type = self.call_type,
stream = self.stream
)
pass
except:
pass
def exception_logging( def exception_logging(
@ -327,7 +407,7 @@ def client(original_function):
*args, **kwargs *args, **kwargs
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
try: try:
global callback_list, add_breadcrumb, user_logger_fn global callback_list, add_breadcrumb, user_logger_fn, Logging
if ( if (
litellm.email is not None litellm.email is not None
or os.getenv("LITELLM_EMAIL", None) is not None or os.getenv("LITELLM_EMAIL", None) is not None
@ -369,12 +449,22 @@ def client(original_function):
) )
if "logger_fn" in kwargs: if "logger_fn" in kwargs:
user_logger_fn = kwargs["logger_fn"] user_logger_fn = kwargs["logger_fn"]
# LOG SUCCESS # CRASH REPORTING TELEMETRY
crash_reporting(*args, **kwargs) crash_reporting(*args, **kwargs)
# INIT LOGGER - for user-specified integrations
model = args[0] if len(args) > 1 else kwargs["model"]
call_type = original_function.__name__
if call_type == CallTypes.completion.value:
messages = args[1] if len(args) > 2 else kwargs["messages"]
elif call_type == CallTypes.embedding.value:
messages = args[1] if len(args) > 2 else kwargs["input"]
stream = True if "stream" in kwargs and kwargs["stream"] == True else False
logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], call_type=call_type)
return logging_obj
except: # DO NOT BLOCK running the function because of this except: # DO NOT BLOCK running the function because of this
print_verbose(f"[Non-Blocking] {traceback.format_exc()}") print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
pass pass
def crash_reporting(*args, **kwargs): def crash_reporting(*args, **kwargs):
if litellm.telemetry: if litellm.telemetry:
try: try:
@ -397,10 +487,11 @@ def client(original_function):
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
start_time = None start_time = None
result = None result = None
litellm_call_id = str(uuid.uuid4())
kwargs["litellm_call_id"] = litellm_call_id
logging_obj = function_setup(*args, **kwargs)
kwargs["litellm_logging_obj"] = logging_obj
try: try:
function_setup(*args, **kwargs)
litellm_call_id = str(uuid.uuid4())
kwargs["litellm_call_id"] = litellm_call_id
start_time = datetime.datetime.now() start_time = datetime.datetime.now()
# [OPTIONAL] CHECK CACHE # [OPTIONAL] CHECK CACHE
# remove this after deprecating litellm.caching # remove this after deprecating litellm.caching
@ -415,10 +506,13 @@ def client(original_function):
# MODEL CALL # MODEL CALL
result = original_function(*args, **kwargs) result = original_function(*args, **kwargs)
end_time = datetime.datetime.now()
# LOG SUCCESS
logging_obj.success_handler(result, start_time, end_time)
if "stream" in kwargs and kwargs["stream"] == True: if "stream" in kwargs and kwargs["stream"] == True:
# TODO: Add to cache for streaming # TODO: Add to cache for streaming
return result return result
end_time = datetime.datetime.now()
# [OPTIONAL] ADD TO CACHE # [OPTIONAL] ADD TO CACHE
if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object
litellm.cache.add_cache(result, *args, **kwargs) litellm.cache.add_cache(result, *args, **kwargs)
@ -433,6 +527,7 @@ def client(original_function):
traceback_exception = traceback.format_exc() traceback_exception = traceback.format_exc()
crash_reporting(*args, **kwargs, exception=traceback_exception) crash_reporting(*args, **kwargs, exception=traceback_exception)
end_time = datetime.datetime.now() end_time = datetime.datetime.now()
logging_obj.failure_handler(e, traceback_exception, start_time, end_time)
my_thread = threading.Thread( my_thread = threading.Thread(
target=handle_failure, target=handle_failure,
args=(e, traceback_exception, start_time, end_time, args, kwargs), args=(e, traceback_exception, start_time, end_time, args, kwargs),
@ -917,44 +1012,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
litellm_call_id=kwargs["litellm_call_id"], litellm_call_id=kwargs["litellm_call_id"],
print_verbose=print_verbose, print_verbose=print_verbose,
) )
elif callback == "lite_debugger":
print_verbose("reaches lite_debugger for logging!")
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = (
args[1]
if len(args) > 1
else kwargs.get(
"messages",
[
{
"role": "user",
"content": " ".join(kwargs.get("input", "")),
}
],
)
)
result = {
"model": model,
"created": time.time(),
"error": traceback_exception,
"usage": {
"prompt_tokens": prompt_token_calculator(
model, messages=messages
),
"completion_tokens": 0,
},
}
liteDebuggerClient.log_event(
model=model,
messages=messages,
end_user=litellm._thread_context.user,
response_obj=result,
start_time=start_time,
end_time=end_time,
litellm_call_id=kwargs["litellm_call_id"],
print_verbose=print_verbose,
)
except: except:
print_verbose( print_verbose(
f"Error Occurred while logging failure: {traceback.format_exc()}" f"Error Occurred while logging failure: {traceback.format_exc()}"
@ -1085,32 +1142,6 @@ def handle_success(args, kwargs, result, start_time, end_time):
litellm_call_id=kwargs["litellm_call_id"], litellm_call_id=kwargs["litellm_call_id"],
print_verbose=print_verbose, print_verbose=print_verbose,
) )
elif callback == "lite_debugger":
print_verbose("reaches lite_debugger for logging!")
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
messages = (
args[1]
if len(args) > 1
else kwargs.get(
"messages",
[
{
"role": "user",
"content": " ".join(kwargs.get("input", "")),
}
],
)
)
liteDebuggerClient.log_event(
model=model,
messages=messages,
end_user=litellm._thread_context.user,
response_obj=result,
start_time=start_time,
end_time=end_time,
litellm_call_id=kwargs["litellm_call_id"],
print_verbose=print_verbose,
)
except Exception as e: except Exception as e:
# LOGGING # LOGGING
exception_logging(logger_fn=user_logger_fn, exception=e) exception_logging(logger_fn=user_logger_fn, exception=e)
@ -1486,9 +1517,10 @@ def get_secret(secret_name):
# wraps the completion stream to return the correct format for the model # wraps the completion stream to return the correct format for the model
# replicate/anthropic/cohere # replicate/anthropic/cohere
class CustomStreamWrapper: class CustomStreamWrapper:
def __init__(self, completion_stream, model, custom_llm_provider=None): def __init__(self, completion_stream, model, custom_llm_provider=None, logging_obj=None):
self.model = model self.model = model
self.custom_llm_provider = custom_llm_provider self.custom_llm_provider = custom_llm_provider
self.logging_obj = logging_obj
if model in litellm.cohere_models: if model in litellm.cohere_models:
# cohere does not return an iterator, so we need to wrap it in one # cohere does not return an iterator, so we need to wrap it in one
self.completion_stream = iter(completion_stream) self.completion_stream = iter(completion_stream)
@ -1497,6 +1529,10 @@ class CustomStreamWrapper:
def __iter__(self): def __iter__(self):
return self return self
def logging(self, text):
if self.logging_obj:
self.logging_obj.post_call(text)
def handle_anthropic_chunk(self, chunk): def handle_anthropic_chunk(self, chunk):
str_line = chunk.decode("utf-8") # Convert bytes to string str_line = chunk.decode("utf-8") # Convert bytes to string
@ -1586,6 +1622,8 @@ class CustomStreamWrapper:
elif self.model in litellm.open_ai_text_completion_models: elif self.model in litellm.open_ai_text_completion_models:
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk) completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
# LOGGING
self.logging_obj(completion_obj["content"])
# return this for all models # return this for all models
return {"choices": [{"delta": completion_obj}]} return {"choices": [{"delta": completion_obj}]}

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.495" version = "0.1.496"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"