adding support for supabase integration

This commit is contained in:
Krrish Dholakia 2023-08-05 19:57:33 -07:00
parent 31ca0839e5
commit 75057c72d2
15 changed files with 259 additions and 23 deletions

View file

@ -0,0 +1,75 @@
# Supabase Tutorial
[Supabase](https://supabase.com/) is an open source Firebase alternative.
Start your project with a Postgres database, Authentication, instant APIs, Edge Functions, Realtime subscriptions, Storage, and Vector embeddings.
## Use Supabase to see total spend across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses.
In this case, we want to log requests to Supabase in both scenarios - when it succeeds and fails.
### Create a supabase table
Go to your Supabase project > go to the [Supabase SQL Editor](https://supabase.com/dashboard/projects) and create a new table with this configuration.
Note: You can change the table name. Just don't change the column names.
```sql
create table
public.request_logs (
id bigint generated by default as identity,
created_at timestamp with time zone null default now(),
model text null default ''::text,
messages json null default '{}'::json,
response json null default '{}'::json,
end_user text null default ''::text,
error json null default '{}'::json,
response_time real null default '0'::real,
total_cost real null,
additional_details json null default '{}'::json,
constraint request_logs_pkey primary key (id)
) tablespace pg_default;
```
### Use Callbacks
Use just 2 lines of code, to instantly see costs and log your responses **across all providers** with Supabase:
```
litellm.success_callback=["supabase"]
litellm.failure_callback=["supabase"]
```
Complete code
```python
from litellm import completion
## set env variables
os.environ["SUPABASE_URL"] = "your-supabase-url"
os.environ["SUPABASE_key"] = "your-supabase-key"
os.environ["OPENAI_API_KEY"] = ""
# set callbacks
litellm.success_callback=["supabase"]
litellm.failure_callback=["supabase"]
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
#bad call
response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad call to test error logging"}])
```
### Additional Controls
**Different Table name**
If you modified your table name, here's how to pass the new name.
```python
litellm.modify_integration("supabase",{"table_name": "litellm_logs"})
```
**Identify end-user**
Here's how to map your llm call to an end-user
```python
litellm.identify({"end_user": "krrish@berri.ai"})
```

View file

@ -1,3 +1,4 @@
import threading
success_callback = [] success_callback = []
failure_callback = [] failure_callback = []
set_verbose=False set_verbose=False
@ -9,20 +10,16 @@ azure_key = None
anthropic_key = None anthropic_key = None
replicate_key = None replicate_key = None
cohere_key = None cohere_key = None
MAX_TOKENS = { ####### THREAD-SPECIFIC DATA ###################
'gpt-3.5-turbo': 4000, class MyLocal(threading.local):
'gpt-3.5-turbo-0613': 4000, def __init__(self):
'gpt-3.5-turbo-0301': 4000, self.user = "Hello World"
'gpt-3.5-turbo-16k': 16000,
'gpt-3.5-turbo-16k-0613': 16000, _thread_context = MyLocal()
'gpt-4': 8000, def identify(event_details):
'gpt-4-0613': 8000, # Store user in thread local data
'gpt-4-32k': 32000, if "user" in event_details:
'claude-instant-1': 100000, _thread_context.user = event_details["user"]
'claude-2': 100000,
'command-nightly': 4096,
'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096,
}
####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone ####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
api_base = None api_base = None
headers = None headers = None
@ -71,6 +68,6 @@ open_ai_embedding_models = [
'text-embedding-ada-002' 'text-embedding-ada-002'
] ]
from .timeout import timeout from .timeout import timeout
from .utils import client, logging, exception_type, get_optional_params # Import all the symbols from main.py from .utils import client, logging, exception_type, get_optional_params, modify_integration
from .main import * # Import all the symbols from main.py from .main import * # Import all the symbols from main.py
from .integrations import * from .integrations import *

View file

@ -0,0 +1,104 @@
#### What this does ####
# On success + failure, log events to Supabase
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import datetime, subprocess, sys
model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
}
class Supabase:
# Class variables or attributes
supabase_table_name = "request_logs"
def __init__(self):
# Instance variables
self.supabase_url = os.getenv("SUPABASE_URL")
self.supabase_key = os.getenv("SUPABASE_KEY")
try:
import supabase
except ImportError:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'supabase'])
import supabase
self.supabase_client = supabase.create_client(self.supabase_url, self.supabase_key)
def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map
# else default to the average of the costs
prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0
if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
elif "replicate" in model:
# replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else:
# calculate average input cost
input_cost_sum = 0
output_cost_sum = 0
for model in model_cost:
input_cost_sum += model_cost[model]["input_cost_per_token"]
output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, end_user, response_obj, start_time, end_time, print_verbose):
try:
print_verbose(f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}")
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
response_time = (end_time-start_time).total_seconds()
if "choices" in response_obj:
supabase_data_obj = {
"response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"response": response_obj['choices'][0]['message']['content'],
"end_user": end_user
}
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
elif "error" in response_obj:
supabase_data_obj = {
"response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"error": response_obj['error'],
"end_user": end_user
}
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
except:
# traceback.print_exc()
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
pass

View file

@ -1,6 +1,6 @@
#### What this tests #### #### What this tests ####
# This tests if logging to the helicone integration actually works # This tests if logging to the helicone integration actually works
# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
# import sys, os # import sys, os
# import traceback # import traceback
# import pytest # import pytest

View file

@ -0,0 +1,27 @@
#### What this tests ####
# This tests if logging to the helicone integration actually works
# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
import sys, os
import traceback
import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
litellm.success_callback = ["supabase"]
litellm.failure_callback = ["supabase"]
litellm.modify_integration("supabase",{"table_name": "litellm_logs"})
litellm.set_verbose = True
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
#bad request call
response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -6,6 +6,10 @@ import datetime, time
from anthropic import Anthropic from anthropic import Anthropic
import tiktoken import tiktoken
encoding = tiktoken.get_encoding("cl100k_base") encoding = tiktoken.get_encoding("cl100k_base")
from .integrations.helicone import HeliconeLogger
from .integrations.aispend import AISpendLogger
from .integrations.berrispend import BerriSpendLogger
from .integrations.supabase import Supabase
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
####### ENVIRONMENT VARIABLES ################### ####### ENVIRONMENT VARIABLES ###################
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
@ -18,6 +22,7 @@ alerts_channel = None
heliconeLogger = None heliconeLogger = None
aispendLogger = None aispendLogger = None
berrispendLogger = None berrispendLogger = None
supabaseClient = None
callback_list = [] callback_list = []
user_logger_fn = None user_logger_fn = None
additional_details = {} additional_details = {}
@ -160,7 +165,7 @@ def get_optional_params(
return optional_params return optional_params
def set_callbacks(callback_list): def set_callbacks(callback_list):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient
try: try:
for callback in callback_list: for callback in callback_list:
if callback == "sentry": if callback == "sentry":
@ -199,16 +204,15 @@ def set_callbacks(callback_list):
alerts_channel = os.environ["SLACK_API_CHANNEL"] alerts_channel = os.environ["SLACK_API_CHANNEL"]
print_verbose(f"Initialized Slack App: {slack_app}") print_verbose(f"Initialized Slack App: {slack_app}")
elif callback == "helicone": elif callback == "helicone":
from .integrations.helicone import HeliconeLogger
heliconeLogger = HeliconeLogger() heliconeLogger = HeliconeLogger()
elif callback == "aispend": elif callback == "aispend":
from .integrations.aispend import AISpendLogger
aispendLogger = AISpendLogger() aispendLogger = AISpendLogger()
elif callback == "berrispend": elif callback == "berrispend":
from .integrations.berrispend import BerriSpendLogger
berrispendLogger = BerriSpendLogger() berrispendLogger = BerriSpendLogger()
except: elif callback == "supabase":
pass supabaseClient = Supabase()
except Exception as e:
raise e
def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs): def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
@ -287,6 +291,22 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
} }
} }
aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose) aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
elif callback == "supabase":
print_verbose("reaches supabase for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
result = {
"model": model,
"created": time.time(),
"error": traceback_exception,
"usage": {
"prompt_tokens": prompt_token_calculator(model, messages=messages),
"completion_tokens": 0
}
}
print(f"litellm._thread_context: {litellm._thread_context}")
supabaseClient.log_event(model=model, messages=messages, end_user=litellm._thread_context.user, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
except: except:
print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}") print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
pass pass
@ -354,6 +374,12 @@ def handle_success(args, kwargs, result, start_time, end_time):
model = args[0] if len(args) > 0 else kwargs["model"] model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"] messages = args[1] if len(args) > 1 else kwargs["messages"]
berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose) berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
elif callback == "supabase":
print_verbose("reaches supabase for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
print(f"litellm._thread_context: {litellm._thread_context}")
supabaseClient.log_event(model=model, messages=messages, end_user=litellm._thread_context.user, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging(logger_fn=user_logger_fn, exception=e) logging(logger_fn=user_logger_fn, exception=e)
@ -369,6 +395,12 @@ def handle_success(args, kwargs, result, start_time, end_time):
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}") print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
pass pass
# integration helper function
def modify_integration(integration_name, integration_params):
global supabaseClient
if integration_name == "supabase":
if "table_name" in integration_params:
Supabase.supabase_table_name = integration_params["table_name"]
def exception_type(model, original_exception): def exception_type(model, original_exception):
global user_logger_fn global user_logger_fn

View file

@ -14,6 +14,7 @@ nav:
- Quick Start: advanced.md - Quick Start: advanced.md
- Output Integrations: client_integrations.md - Output Integrations: client_integrations.md
- Helicone Tutorial: helicone_integration.md - Helicone Tutorial: helicone_integration.md
- Supabase Tutorial: supabase_integration.md
- BerriSpend Tutorial: berrispend_integration.md - BerriSpend Tutorial: berrispend_integration.md
- 💡 Support: - 💡 Support:
- Troubleshooting & Help: troubleshoot.md - Troubleshooting & Help: troubleshoot.md

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.345" version = "0.1.346"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"