making logging a class - adding input-callbacks

This commit is contained in:
Krrish Dholakia 2023-08-19 20:03:31 -07:00
parent 9ac577ed6f
commit d792be89de
12 changed files with 237 additions and 315 deletions

View file

@ -22,11 +22,13 @@ create table
messages json null default '{}'::json, messages json null default '{}'::json,
response json null default '{}'::json, response json null default '{}'::json,
end_user text null default ''::text, end_user text null default ''::text,
status text null default ''::text,
error json null default '{}'::json, error json null default '{}'::json,
response_time real null default '0'::real, response_time real null default '0'::real,
total_cost real null, total_cost real null,
additional_details json null default '{}'::json, additional_details json null default '{}'::json,
constraint request_logs_pkey primary key (id) litellm_call_id text unique,
primary key (id)
) tablespace pg_default; ) tablespace pg_default;
``` ```

View file

@ -1,6 +1,6 @@
import threading import threading
from typing import Callable, List, Optional from typing import Callable, List, Optional
input_callback: List[str] = []
success_callback: List[str] = [] success_callback: List[str] = []
failure_callback: List[str] = [] failure_callback: List[str] = []
set_verbose = False set_verbose = False
@ -216,7 +216,6 @@ from .timeout import timeout
from .testing import * from .testing import *
from .utils import ( from .utils import (
client, client,
logging,
exception_type, exception_type,
get_optional_params, get_optional_params,
modify_integration, modify_integration,
@ -224,6 +223,7 @@ from .utils import (
cost_per_token, cost_per_token,
completion_cost, completion_cost,
get_litellm_params, get_litellm_params,
Logging
) )
from .main import * # type: ignore from .main import * # type: ignore
from .integrations import * from .integrations import *

View file

@ -144,6 +144,28 @@ class Supabase:
) )
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose):
try:
print_verbose(
f"Supabase Logging - Enters input logging function for model {model}"
)
supabase_data_obj = {
"model": model,
"messages": messages,
"end_user": end_user,
"status": "initiated",
"litellm_call_id": litellm_call_id
}
data, count = (
self.supabase_client.table(self.supabase_table_name)
.insert(supabase_data_obj)
.execute()
)
print(f"data: {data}")
pass
except:
pass
def log_event( def log_event(
self, self,
model, model,
@ -152,6 +174,7 @@ class Supabase:
response_obj, response_obj,
start_time, start_time,
end_time, end_time,
litellm_call_id,
print_verbose, print_verbose,
): ):
try: try:
@ -176,16 +199,20 @@ class Supabase:
"messages": messages, "messages": messages,
"response": response_obj["choices"][0]["message"]["content"], "response": response_obj["choices"][0]["message"]["content"],
"end_user": end_user, "end_user": end_user,
"litellm_call_id": litellm_call_id,
"status": "success"
} }
print_verbose( print_verbose(
f"Supabase Logging - final data object: {supabase_data_obj}" f"Supabase Logging - final data object: {supabase_data_obj}"
) )
data, count = ( data, count = (
self.supabase_client.table(self.supabase_table_name) self.supabase_client.table(self.supabase_table_name)
.insert(supabase_data_obj) .upsert(supabase_data_obj)
.execute() .execute()
) )
elif "error" in response_obj: elif "error" in response_obj:
if "Unable to map your input to a model." in response_obj["error"]:
total_cost = 0
supabase_data_obj = { supabase_data_obj = {
"response_time": response_time, "response_time": response_time,
"model": response_obj["model"], "model": response_obj["model"],
@ -193,13 +220,15 @@ class Supabase:
"messages": messages, "messages": messages,
"error": response_obj["error"], "error": response_obj["error"],
"end_user": end_user, "end_user": end_user,
"litellm_call_id": litellm_call_id,
"status": "failure"
} }
print_verbose( print_verbose(
f"Supabase Logging - final data object: {supabase_data_obj}" f"Supabase Logging - final data object: {supabase_data_obj}"
) )
data, count = ( data, count = (
self.supabase_client.table(self.supabase_table_name) self.supabase_client.table(self.supabase_table_name)
.insert(supabase_data_obj) .upsert(supabase_data_obj)
.execute() .execute()
) )

View file

@ -1,7 +1,6 @@
import os, json import os, json
from enum import Enum from enum import Enum
import requests import requests
from litellm import logging
import time import time
from typing import Callable from typing import Callable
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
@ -22,11 +21,12 @@ class AnthropicError(Exception):
class AnthropicLLM: class AnthropicLLM:
def __init__(self, encoding, default_max_tokens_to_sample, api_key=None): def __init__(self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None):
self.encoding = encoding self.encoding = encoding
self.default_max_tokens_to_sample = default_max_tokens_to_sample self.default_max_tokens_to_sample = default_max_tokens_to_sample
self.completion_url = "https://api.anthropic.com/v1/complete" self.completion_url = "https://api.anthropic.com/v1/complete"
self.api_key = api_key self.api_key = api_key
self.logging_obj = logging_obj
self.validate_environment(api_key=api_key) self.validate_environment(api_key=api_key)
def validate_environment( def validate_environment(
@ -84,6 +84,7 @@ class AnthropicLLM:
} }
## LOGGING ## LOGGING
self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
logging( logging(
model=model, model=model,
input=prompt, input=prompt,
@ -101,16 +102,7 @@ class AnthropicLLM:
return response.iter_lines() return response.iter_lines()
else: else:
## LOGGING ## LOGGING
logging( self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
"original_response": response.text,
},
logger_fn=logger_fn,
)
print_verbose(f"raw model_response: {response.text}") print_verbose(f"raw model_response: {response.text}")
## RESPONSE OBJECT ## RESPONSE OBJECT
completion_response = response.json() completion_response = response.json()

View file

@ -2,7 +2,6 @@
import os, json import os, json
from enum import Enum from enum import Enum
import requests import requests
from litellm import logging
import time import time
from typing import Callable from typing import Callable
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
@ -19,8 +18,9 @@ class HuggingfaceError(Exception):
class HuggingfaceRestAPILLM: class HuggingfaceRestAPILLM:
def __init__(self, encoding, api_key=None) -> None: def __init__(self, encoding, logging_obj, api_key=None) -> None:
self.encoding = encoding self.encoding = encoding
self.logging_obj = logging_obj
self.validate_environment(api_key=api_key) self.validate_environment(api_key=api_key)
def validate_environment( def validate_environment(
@ -74,18 +74,10 @@ class HuggingfaceRestAPILLM:
optional_params["max_new_tokens"] = value optional_params["max_new_tokens"] = value
data = { data = {
"inputs": prompt, "inputs": prompt,
# "parameters": optional_params "parameters": optional_params
} }
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
},
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
response = requests.post( response = requests.post(
completion_url, headers=self.headers, data=json.dumps(data) completion_url, headers=self.headers, data=json.dumps(data)
@ -94,17 +86,7 @@ class HuggingfaceRestAPILLM:
return response.iter_lines() return response.iter_lines()
else: else:
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
model=model,
input=prompt,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
"original_response": response.text,
},
logger_fn=logger_fn,
)
print_verbose(f"raw model_response: {response.text}")
## RESPONSE OBJECT ## RESPONSE OBJECT
completion_response = response.json() completion_response = response.json()
print_verbose(f"response: {completion_response}") print_verbose(f"response: {completion_response}")

View file

@ -6,11 +6,11 @@ from copy import deepcopy
import litellm import litellm
from litellm import ( # type: ignore from litellm import ( # type: ignore
client, client,
logging,
exception_type, exception_type,
timeout, timeout,
get_optional_params, get_optional_params,
get_litellm_params, get_litellm_params,
Logging
) )
from litellm.utils import ( from litellm.utils import (
get_secret, get_secret,
@ -85,6 +85,7 @@ def completion(
azure=False, azure=False,
custom_llm_provider=None, custom_llm_provider=None,
custom_api_base=None, custom_api_base=None,
litellm_call_id=None,
# model specific optional params # model specific optional params
# used by text-bison only # used by text-bison only
top_k=40, top_k=40,
@ -129,8 +130,9 @@ def completion(
verbose=verbose, verbose=verbose,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
custom_api_base=custom_api_base, custom_api_base=custom_api_base,
litellm_call_id=litellm_call_id
) )
logging = Logging(model=model, messages=messages, optional_params=optional_params, litellm_params=litellm_params)
if custom_llm_provider == "azure": if custom_llm_provider == "azure":
# azure configs # azure configs
openai.api_type = "azure" openai.api_type = "azure"
@ -144,16 +146,14 @@ def completion(
if litellm.api_version is not None if litellm.api_version is not None
else get_secret("AZURE_API_VERSION") else get_secret("AZURE_API_VERSION")
) )
if not api_key and litellm.azure_key:
api_key = litellm.azure_key
elif not api_key and get_secret("AZURE_API_KEY"):
api_key = get_secret("AZURE_API_KEY")
# set key # set key
openai.api_key = api_key or litellm.azure_key or get_secret("AZURE_API_KEY") openai.api_key = api_key
## LOGGING ## LOGGING
logging( logging.pre_call(input=messages, api_key=openai.api_key, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base})
model=model,
input=messages,
additional_args=optional_params,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
if litellm.headers: if litellm.headers:
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
@ -166,6 +166,8 @@ def completion(
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
model=model, messages=messages, **optional_params model=model, messages=messages, **optional_params
) )
## LOGGING
logging.post_call(input=messages, api_key=openai.api_key, original_response=response, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base})
elif ( elif (
model in litellm.open_ai_chat_completion_models model in litellm.open_ai_chat_completion_models
or custom_llm_provider == "custom_openai" or custom_llm_provider == "custom_openai"
@ -182,18 +184,15 @@ def completion(
if litellm.organization: if litellm.organization:
openai.organization = litellm.organization openai.organization = litellm.organization
# set API KEY # set API KEY
openai.api_key = ( if not api_key and litellm.openai_key:
api_key or litellm.openai_key or get_secret("OPENAI_API_KEY") api_key = litellm.openai_key
) elif not api_key and get_secret("AZURE_API_KEY"):
api_key = get_secret("OPENAI_API_KEY")
openai.api_key = api_key
## LOGGING ## LOGGING
logging( logging.pre_call(input=messages, api_key=api_key, additional_args={"headers": litellm.headers, "api_base": api_base})
model=model,
input=messages,
additional_args=args,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
if litellm.headers: if litellm.headers:
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
@ -206,6 +205,8 @@ def completion(
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
model=model, messages=messages, **optional_params model=model, messages=messages, **optional_params
) )
## LOGGING
logging.post_call(input=messages, api_key=api_key, original_response=response, additional_args={"headers": litellm.headers})
elif model in litellm.open_ai_text_completion_models: elif model in litellm.open_ai_text_completion_models:
openai.api_type = "openai" openai.api_type = "openai"
openai.api_base = ( openai.api_base = (
@ -214,20 +215,19 @@ def completion(
else "https://api.openai.com/v1" else "https://api.openai.com/v1"
) )
openai.api_version = None openai.api_version = None
openai.api_key = ( # set API KEY
api_key or litellm.openai_key or get_secret("OPENAI_API_KEY") if not api_key and litellm.openai_key:
) api_key = litellm.openai_key
elif not api_key and get_secret("AZURE_API_KEY"):
api_key = get_secret("OPENAI_API_KEY")
openai.api_key = api_key
if litellm.organization: if litellm.organization:
openai.organization = litellm.organization openai.organization = litellm.organization
prompt = " ".join([message["content"] for message in messages]) prompt = " ".join([message["content"] for message in messages])
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=api_key, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type})
model=model,
input=prompt,
additional_args=optional_params,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
if litellm.headers: if litellm.headers:
response = openai.Completion.create( response = openai.Completion.create(
@ -237,19 +237,10 @@ def completion(
) )
else: else:
response = openai.Completion.create(model=model, prompt=prompt) response = openai.Completion.create(model=model, prompt=prompt)
completion_response = response["choices"][0]["text"]
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=api_key, original_response=response, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type})
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": completion_response,
},
logger_fn=logger_fn,
)
## RESPONSE OBJECT ## RESPONSE OBJECT
completion_response = response["choices"][0]["text"]
model_response["choices"][0]["message"]["content"] = completion_response model_response["choices"][0]["message"]["content"] = completion_response
model_response["created"] = response["created"] model_response["created"] = response["created"]
model_response["model"] = model model_response["model"] = model
@ -278,13 +269,7 @@ def completion(
input["max_length"] = max_tokens # for t5 models input["max_length"] = max_tokens # for t5 models
input["max_new_tokens"] = max_tokens # for llama2 models input["max_new_tokens"] = max_tokens # for llama2 models
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=replicate_key, additional_args={"complete_input_dict": input, "max_tokens": max_tokens})
model=model,
input=input,
custom_llm_provider=custom_llm_provider,
additional_args={"max_tokens": max_tokens},
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
output = replicate.run(model, input=input) output = replicate.run(model, input=input)
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
@ -297,16 +282,8 @@ def completion(
response += item response += item
completion_response = response completion_response = response
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=replicate_key, original_response=completion_response, additional_args={"complete_input_dict": input, "max_tokens": max_tokens})
model=model, ## USAGE
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": completion_response,
},
logger_fn=logger_fn,
)
prompt_tokens = len(encoding.encode(prompt)) prompt_tokens = len(encoding.encode(prompt))
completion_tokens = len(encoding.encode(completion_response)) completion_tokens = len(encoding.encode(completion_response))
## RESPONSE OBJECT ## RESPONSE OBJECT
@ -327,6 +304,7 @@ def completion(
encoding=encoding, encoding=encoding,
default_max_tokens_to_sample=litellm.max_tokens, default_max_tokens_to_sample=litellm.max_tokens,
api_key=anthropic_key, api_key=anthropic_key,
logging_obj = logging # model call logging done inside the class as we make need to modify I/O to fit anthropic's requirements
) )
model_response = anthropic_client.completion( model_response = anthropic_client.completion(
model=model, model=model,
@ -362,13 +340,7 @@ def completion(
"OR_API_KEY" "OR_API_KEY"
) )
## LOGGING ## LOGGING
logging( logging.pre_call(input=messages, api_key=openai.api_key)
model=model,
input=messages,
additional_args=optional_params,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
if litellm.headers: if litellm.headers:
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
@ -395,6 +367,8 @@ def completion(
}, },
**optional_params, **optional_params,
) )
## LOGGING
logging.post_call(input=messages, api_key=openai.api_key, original_response=response)
elif model in litellm.cohere_models: elif model in litellm.cohere_models:
# import cohere/if it fails then pip install cohere # import cohere/if it fails then pip install cohere
install_and_import("cohere") install_and_import("cohere")
@ -409,31 +383,17 @@ def completion(
co = cohere.Client(cohere_key) co = cohere.Client(cohere_key)
prompt = " ".join([message["content"] for message in messages]) prompt = " ".join([message["content"] for message in messages])
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=cohere_key)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
## COMPLETION CALL ## COMPLETION CALL
response = co.generate(model=model, prompt=prompt, **optional_params) response = co.generate(model=model, prompt=prompt, **optional_params)
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object, # don't try to access stream object,
response = CustomStreamWrapper(response, model) response = CustomStreamWrapper(response, model)
return response return response
completion_response = response[0].text
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=cohere_key, original_response=response)
model=model, ## USAGE
input=prompt, completion_response = response[0].text
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": completion_response,
},
logger_fn=logger_fn,
)
prompt_tokens = len(encoding.encode(prompt)) prompt_tokens = len(encoding.encode(prompt))
completion_tokens = len(encoding.encode(completion_response)) completion_tokens = len(encoding.encode(completion_response))
## RESPONSE OBJECT ## RESPONSE OBJECT
@ -457,7 +417,7 @@ def completion(
or os.environ.get("HUGGINGFACE_API_KEY") or os.environ.get("HUGGINGFACE_API_KEY")
) )
huggingface_client = HuggingfaceRestAPILLM( huggingface_client = HuggingfaceRestAPILLM(
encoding=encoding, api_key=huggingface_key encoding=encoding, api_key=huggingface_key, logging_obj=logging
) )
model_response = huggingface_client.completion( model_response = huggingface_client.completion(
model=model, model=model,
@ -492,12 +452,7 @@ def completion(
) # TODO: Add chat support for together AI ) # TODO: Add chat support for together AI
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=TOGETHER_AI_TOKEN)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
if stream == True: if stream == True:
return together_ai_completion_streaming( return together_ai_completion_streaming(
{ {
@ -519,17 +474,7 @@ def completion(
headers=headers, headers=headers,
) )
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=TOGETHER_AI_TOKEN, original_response=res.text)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": res.text,
},
logger_fn=logger_fn,
)
# make this safe for reading, if output does not exist raise an error # make this safe for reading, if output does not exist raise an error
json_response = res.json() json_response = res.json()
if "output" not in json_response: if "output" not in json_response:
@ -562,16 +507,7 @@ def completion(
prompt = " ".join([message["content"] for message in messages]) prompt = " ".join([message["content"] for message in messages])
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=None)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"litellm_params": litellm_params,
"optional_params": optional_params,
},
logger_fn=logger_fn,
)
chat_model = ChatModel.from_pretrained(model) chat_model = ChatModel.from_pretrained(model)
@ -579,16 +515,7 @@ def completion(
completion_response = chat.send_message(prompt, **optional_params) completion_response = chat.send_message(prompt, **optional_params)
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=None, original_response=completion_response)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": completion_response,
},
logger_fn=logger_fn,
)
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["message"]["content"] = completion_response model_response["choices"][0]["message"]["content"] = completion_response
@ -607,27 +534,13 @@ def completion(
prompt = " ".join([message["content"] for message in messages]) prompt = " ".join([message["content"] for message in messages])
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=None)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
vertex_model = TextGenerationModel.from_pretrained(model) vertex_model = TextGenerationModel.from_pretrained(model)
completion_response = vertex_model.predict(prompt, **optional_params) completion_response = vertex_model.predict(prompt, **optional_params)
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=None, original_response=completion_response)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": completion_response,
},
logger_fn=logger_fn,
)
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["message"]["content"] = completion_response model_response["choices"][0]["message"]["content"] = completion_response
model_response["created"] = time.time() model_response["created"] = time.time()
@ -641,12 +554,7 @@ def completion(
prompt = " ".join([message["content"] for message in messages]) prompt = " ".join([message["content"] for message in messages])
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=ai21.api_key)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
ai21_response = ai21.Completion.execute( ai21_response = ai21.Completion.execute(
model=model, model=model,
@ -655,16 +563,7 @@ def completion(
completion_response = ai21_response["completions"][0]["data"]["text"] completion_response = ai21_response["completions"][0]["data"]["text"]
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=ai21.api_key, original_response=completion_response)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": completion_response,
},
logger_fn=logger_fn,
)
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["message"]["content"] = completion_response model_response["choices"][0]["message"]["content"] = completion_response
@ -678,7 +577,8 @@ def completion(
prompt = " ".join([message["content"] for message in messages]) prompt = " ".join([message["content"] for message in messages])
## LOGGING ## LOGGING
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) logging.pre_call(input=prompt, api_key=None, additional_args={"endpoint": endpoint})
generator = get_ollama_response_stream(endpoint, model, prompt) generator = get_ollama_response_stream(endpoint, model, prompt)
# assume all responses are streamed # assume all responses are streamed
return generator return generator
@ -693,12 +593,7 @@ def completion(
prompt = " ".join([message["content"] for message in messages]) prompt = " ".join([message["content"] for message in messages])
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=base_ten_key)
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
base_ten__model = baseten.deployed_model_version_id(model) base_ten__model = baseten.deployed_model_version_id(model)
@ -708,16 +603,8 @@ def completion(
if type(completion_response) == dict: if type(completion_response) == dict:
completion_response = completion_response["generated_text"] completion_response = completion_response["generated_text"]
logging( ## LOGGING
model=model, logging.post_call(input=prompt, api_key=base_ten_key, original_response=completion_response)
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": completion_response,
},
logger_fn=logger_fn,
)
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["message"]["content"] = completion_response model_response["choices"][0]["message"]["content"] = completion_response
@ -734,26 +621,14 @@ def completion(
prompt = " ".join([message["content"] for message in messages]) prompt = " ".join([message["content"] for message in messages])
## LOGGING ## LOGGING
logging( logging.pre_call(input=prompt, api_key=None, additional_args={"url": url, "max_new_tokens": 100})
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
response = requests.post( response = requests.post(
url, data={"inputs": prompt, "max_new_tokens": 100, "model": model} url, data={"inputs": prompt, "max_new_tokens": 100, "model": model}
) )
## LOGGING ## LOGGING
logging( logging.post_call(input=prompt, api_key=None, original_response=response.text, additional_args={"url": url, "max_new_tokens": 100})
model=model,
input=prompt,
custom_llm_provider=custom_llm_provider,
additional_args={
"max_tokens": max_tokens,
"original_response": response,
},
logger_fn=logger_fn,
)
completion_response = response.json()["outputs"] completion_response = response.json()["outputs"]
# RESPONSE OBJECT # RESPONSE OBJECT
@ -762,13 +637,6 @@ def completion(
model_response["model"] = model model_response["model"] = model
response = model_response response = model_response
else: else:
## LOGGING
logging(
model=model,
input=messages,
custom_llm_provider=custom_llm_provider,
logger_fn=logger_fn,
)
args = locals() args = locals()
raise ValueError( raise ValueError(
f"Unable to map your input to a model. Check your input - {args}" f"Unable to map your input to a model. Check your input - {args}"
@ -776,14 +644,7 @@ def completion(
return response return response
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging( logging.post_call(input=messages, api_key=api_key, original_response=e)
model=model,
input=messages,
custom_llm_provider=custom_llm_provider,
additional_args={"max_tokens": max_tokens},
logger_fn=logger_fn,
exception=e,
)
## Map to OpenAI Exception ## Map to OpenAI Exception
raise exception_type( raise exception_type(
model=model, custom_llm_provider=custom_llm_provider, original_exception=e model=model, custom_llm_provider=custom_llm_provider, original_exception=e
@ -825,7 +686,7 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
openai.api_version = get_secret("AZURE_API_VERSION") openai.api_version = get_secret("AZURE_API_VERSION")
openai.api_key = get_secret("AZURE_API_KEY") openai.api_key = get_secret("AZURE_API_KEY")
## LOGGING ## LOGGING
logging(model=model, input=input, azure=azure, logger_fn=logger_fn) logging.pre_call(model=model, input=input, azure=azure, logger_fn=logger_fn)
## EMBEDDING CALL ## EMBEDDING CALL
response = openai.Embedding.create(input=input, engine=model) response = openai.Embedding.create(input=input, engine=model)
print_verbose(f"response_value: {str(response)[:50]}") print_verbose(f"response_value: {str(response)[:50]}")

View file

@ -1,27 +1,28 @@
# #### What this tests #### #### What this tests ####
# # This tests if logging to the helicone integration actually works # This tests if logging to the helicone integration actually works
# # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
# import sys, os import sys, os
# import traceback import traceback
# import pytest import pytest
# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
# import litellm import litellm
# from litellm import embedding, completion from litellm import embedding, completion
# litellm.success_callback = ["supabase"] litellm.input_callback = ["supabase"]
# litellm.failure_callback = ["supabase"] litellm.success_callback = ["supabase"]
litellm.failure_callback = ["supabase"]
# litellm.modify_integration("supabase",{"table_name": "litellm_logs"}) litellm.modify_integration("supabase",{"table_name": "test_table"})
# litellm.set_verbose = True litellm.set_verbose = True
# user_message = "Hello, how are you?" user_message = "Hello, how are you?"
# messages = [{ "content": user_message,"role": "user"}] messages = [{ "content": user_message,"role": "user"}]
# #openai call #openai call
# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
# #bad request call #bad request call
# response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -135,48 +135,105 @@ def install_and_import(package: str):
####### LOGGING ################### ####### LOGGING ###################
# Logging function -> log the exact model details + what's being sent | Non-Blocking # Logging function -> log the exact model details + what's being sent | Non-Blocking
def logging( class Logging:
model=None, def __init__(self, model, messages, optional_params, litellm_params):
input=None, self.model = model
custom_llm_provider=None, self.messages = messages
azure=False, self.optional_params = optional_params
self.litellm_params = litellm_params
self.logger_fn = litellm_params["logger_fn"]
self.model_call_details = {
"model": model,
"messages": messages,
"optional_params": self.optional_params,
"litellm_params": self.litellm_params,
}
def pre_call(self, input, api_key, additional_args={}):
try:
print(f"logging pre call for model: {self.model}")
self.model_call_details["input"] = input
self.model_call_details["api_key"] = api_key
self.model_call_details["additional_args"] = additional_args
## User Logging -> if you pass in a custom logging function
print_verbose(
f"Logging Details: logger_fn - {self.logger_fn} | callable(logger_fn) - {callable(self.logger_fn)}"
)
if self.logger_fn and callable(self.logger_fn):
try:
self.logger_fn(
self.model_call_details
) # Expectation: any logger function passed in by the user should accept a dict object
except Exception as e:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
)
## Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
for callback in litellm.input_callback:
try:
if callback == "supabase":
print_verbose("reaches supabase for logging!")
model = self.model
messages = self.messages
print(f"litellm._thread_context: {litellm._thread_context}")
supabaseClient.input_log_event(
model=model,
messages=messages,
end_user=litellm._thread_context.user,
litellm_call_id=self.litellm_params["litellm_call_id"],
print_verbose=print_verbose,
)
pass
except:
pass
except:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
)
pass
def post_call(self, input, api_key, original_response, additional_args={}):
# Do something here
try:
self.model_call_details["input"] = input
self.model_call_details["api_key"] = api_key
self.model_call_details["original_response"] = original_response
self.model_call_details["additional_args"] = additional_args
## User Logging -> if you pass in a custom logging function
print_verbose(
f"Logging Details: logger_fn - {self.logger_fn} | callable(logger_fn) - {callable(self.logger_fn)}"
)
if self.logger_fn and callable(self.logger_fn):
try:
self.logger_fn(
self.model_call_details
) # Expectation: any logger function passed in by the user should accept a dict object
except Exception as e:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
)
except:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
)
pass
# Add more methods as needed
def exception_logging(
additional_args={}, additional_args={},
logger_fn=None, logger_fn=None,
exception=None, exception=None,
): ):
try: try:
model_call_details = {} model_call_details = {}
if model:
model_call_details["model"] = model
if azure:
model_call_details["azure"] = azure
if custom_llm_provider:
model_call_details["custom_llm_provider"] = custom_llm_provider
if exception: if exception:
model_call_details["exception"] = exception model_call_details["exception"] = exception
if input:
model_call_details["input"] = input
if len(additional_args):
model_call_details["additional_args"] = additional_args model_call_details["additional_args"] = additional_args
# log additional call details -> api key, etc.
if model:
if (
azure == True
or model in litellm.open_ai_chat_completion_models
or model in litellm.open_ai_chat_completion_models
or model in litellm.open_ai_embedding_models
):
model_call_details["api_type"] = openai.api_type
model_call_details["api_base"] = openai.api_base
model_call_details["api_version"] = openai.api_version
model_call_details["api_key"] = openai.api_key
elif "replicate" in model:
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
elif model in litellm.anthropic_models:
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
elif model in litellm.cohere_models:
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
print_verbose( print_verbose(
f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}" f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}"
@ -206,10 +263,10 @@ def client(original_function):
try: try:
global callback_list, add_breadcrumb, user_logger_fn global callback_list, add_breadcrumb, user_logger_fn
if ( if (
len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0 len(litellm.input_callback) > 0 or len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0
) and len(callback_list) == 0: ) and len(callback_list) == 0:
callback_list = list( callback_list = list(
set(litellm.success_callback + litellm.failure_callback) set(litellm.input_callback + litellm.success_callback + litellm.failure_callback)
) )
set_callbacks( set_callbacks(
callback_list=callback_list, callback_list=callback_list,
@ -299,13 +356,16 @@ def client(original_function):
result = None result = None
try: try:
function_setup(*args, **kwargs) function_setup(*args, **kwargs)
## MODEL CALL litellm_call_id = str(uuid.uuid4())
kwargs["litellm_call_id"] = litellm_call_id
## [OPTIONAL] CHECK CACHE
start_time = datetime.datetime.now() start_time = datetime.datetime.now()
if (litellm.caching or litellm.caching_with_models) and ( if (litellm.caching or litellm.caching_with_models) and (
cached_result := check_cache(*args, **kwargs) cached_result := check_cache(*args, **kwargs)
) is not None: ) is not None:
result = cached_result result = cached_result
else: else:
## MODEL CALL
result = original_function(*args, **kwargs) result = original_function(*args, **kwargs)
end_time = datetime.datetime.now() end_time = datetime.datetime.now()
## Add response to CACHE ## Add response to CACHE
@ -399,6 +459,7 @@ def get_litellm_params(
together_ai=False, together_ai=False,
custom_llm_provider=None, custom_llm_provider=None,
custom_api_base=None, custom_api_base=None,
litellm_call_id=None,
): ):
litellm_params = { litellm_params = {
"return_async": return_async, "return_async": return_async,
@ -408,6 +469,7 @@ def get_litellm_params(
"verbose": verbose, "verbose": verbose,
"custom_llm_provider": custom_llm_provider, "custom_llm_provider": custom_llm_provider,
"custom_api_base": custom_api_base, "custom_api_base": custom_api_base,
"litellm_call_id": litellm_call_id
} }
return litellm_params return litellm_params
@ -567,7 +629,8 @@ def set_callbacks(callback_list):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient
try: try:
for callback in callback_list: for callback in callback_list:
if callback == "sentry" or "SENTRY_API_URL" in os.environ: print(f"callback: {callback}")
if callback == "sentry":
try: try:
import sentry_sdk import sentry_sdk
except ImportError: except ImportError:
@ -623,6 +686,7 @@ def set_callbacks(callback_list):
elif callback == "berrispend": elif callback == "berrispend":
berrispendLogger = BerriSpendLogger() berrispendLogger = BerriSpendLogger()
elif callback == "supabase": elif callback == "supabase":
print(f"instantiating supabase")
supabaseClient = Supabase() supabaseClient = Supabase()
except Exception as e: except Exception as e:
raise e raise e
@ -743,7 +807,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
"completion_tokens": 0, "completion_tokens": 0,
}, },
} }
print(f"litellm._thread_context: {litellm._thread_context}")
supabaseClient.log_event( supabaseClient.log_event(
model=model, model=model,
messages=messages, messages=messages,
@ -751,9 +814,9 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
response_obj=result, response_obj=result,
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
litellm_call_id=kwargs["litellm_call_id"],
print_verbose=print_verbose, print_verbose=print_verbose,
) )
except: except:
print_verbose( print_verbose(
f"Error Occurred while logging failure: {traceback.format_exc()}" f"Error Occurred while logging failure: {traceback.format_exc()}"
@ -769,7 +832,7 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
pass pass
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging(logger_fn=user_logger_fn, exception=e) exception_logging(logger_fn=user_logger_fn, exception=e)
pass pass
@ -849,11 +912,12 @@ def handle_success(args, kwargs, result, start_time, end_time):
response_obj=result, response_obj=result,
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
litellm_call_id=kwargs["litellm_call_id"],
print_verbose=print_verbose, print_verbose=print_verbose,
) )
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging(logger_fn=user_logger_fn, exception=e) exception_logging(logger_fn=user_logger_fn, exception=e)
print_verbose( print_verbose(
f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}" f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}"
) )
@ -864,7 +928,7 @@ def handle_success(args, kwargs, result, start_time, end_time):
pass pass
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging(logger_fn=user_logger_fn, exception=e) exception_logging(logger_fn=user_logger_fn, exception=e)
print_verbose( print_verbose(
f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}" f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}"
) )
@ -912,15 +976,6 @@ def exception_type(model, original_exception, custom_llm_provider):
exception_type = type(original_exception).__name__ exception_type = type(original_exception).__name__
else: else:
exception_type = "" exception_type = ""
logging(
model=model,
additional_args={
"error_str": error_str,
"exception_type": exception_type,
"original_exception": original_exception,
},
logger_fn=user_logger_fn,
)
if "claude" in model: # one of the anthropics if "claude" in model: # one of the anthropics
if hasattr(original_exception, "status_code"): if hasattr(original_exception, "status_code"):
print_verbose(f"status_code: {original_exception.status_code}") print_verbose(f"status_code: {original_exception.status_code}")
@ -1030,7 +1085,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise original_exception raise original_exception
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging( exception_logging(
logger_fn=user_logger_fn, logger_fn=user_logger_fn,
additional_args={ additional_args={
"exception_mapping_worked": exception_mapping_worked, "exception_mapping_worked": exception_mapping_worked,