adding support for supabase integration

This commit is contained in:
Krrish Dholakia 2023-08-05 19:57:33 -07:00
parent 31ca0839e5
commit 75057c72d2
15 changed files with 259 additions and 23 deletions

View file

@ -0,0 +1,75 @@
# Supabase Tutorial
[Supabase](https://supabase.com/) is an open source Firebase alternative.
Start your project with a Postgres database, Authentication, instant APIs, Edge Functions, Realtime subscriptions, Storage, and Vector embeddings.
## Use Supabase to see total spend across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses.
In this case, we want to log requests to Supabase in both scenarios - when it succeeds and fails.
### Create a supabase table
Go to your Supabase project > go to the [Supabase SQL Editor](https://supabase.com/dashboard/projects) and create a new table with this configuration.
Note: You can change the table name. Just don't change the column names.
```sql
create table
public.request_logs (
id bigint generated by default as identity,
created_at timestamp with time zone null default now(),
model text null default ''::text,
messages json null default '{}'::json,
response json null default '{}'::json,
end_user text null default ''::text,
error json null default '{}'::json,
response_time real null default '0'::real,
total_cost real null,
additional_details json null default '{}'::json,
constraint request_logs_pkey primary key (id)
) tablespace pg_default;
```
### Use Callbacks
Use just 2 lines of code, to instantly see costs and log your responses **across all providers** with Supabase:
```
litellm.success_callback=["supabase"]
litellm.failure_callback=["supabase"]
```
Complete code
```python
from litellm import completion
## set env variables
os.environ["SUPABASE_URL"] = "your-supabase-url"
os.environ["SUPABASE_key"] = "your-supabase-key"
os.environ["OPENAI_API_KEY"] = ""
# set callbacks
litellm.success_callback=["supabase"]
litellm.failure_callback=["supabase"]
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
#bad call
response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad call to test error logging"}])
```
### Additional Controls
**Different Table name**
If you modified your table name, here's how to pass the new name.
```python
litellm.modify_integration("supabase",{"table_name": "litellm_logs"})
```
**Identify end-user**
Here's how to map your llm call to an end-user
```python
litellm.identify({"end_user": "krrish@berri.ai"})
```

View file

@ -1,3 +1,4 @@
import threading
success_callback = []
failure_callback = []
set_verbose=False
@ -9,20 +10,16 @@ azure_key = None
anthropic_key = None
replicate_key = None
cohere_key = None
MAX_TOKENS = {
'gpt-3.5-turbo': 4000,
'gpt-3.5-turbo-0613': 4000,
'gpt-3.5-turbo-0301': 4000,
'gpt-3.5-turbo-16k': 16000,
'gpt-3.5-turbo-16k-0613': 16000,
'gpt-4': 8000,
'gpt-4-0613': 8000,
'gpt-4-32k': 32000,
'claude-instant-1': 100000,
'claude-2': 100000,
'command-nightly': 4096,
'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096,
}
####### THREAD-SPECIFIC DATA ###################
class MyLocal(threading.local):
def __init__(self):
self.user = "Hello World"
_thread_context = MyLocal()
def identify(event_details):
# Store user in thread local data
if "user" in event_details:
_thread_context.user = event_details["user"]
####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
api_base = None
headers = None
@ -71,6 +68,6 @@ open_ai_embedding_models = [
'text-embedding-ada-002'
]
from .timeout import timeout
from .utils import client, logging, exception_type, get_optional_params # Import all the symbols from main.py
from .utils import client, logging, exception_type, get_optional_params, modify_integration
from .main import * # Import all the symbols from main.py
from .integrations import *

View file

@ -0,0 +1,104 @@
#### What this does ####
# On success + failure, log events to Supabase
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import datetime, subprocess, sys
model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
}
class Supabase:
# Class variables or attributes
supabase_table_name = "request_logs"
def __init__(self):
# Instance variables
self.supabase_url = os.getenv("SUPABASE_URL")
self.supabase_key = os.getenv("SUPABASE_KEY")
try:
import supabase
except ImportError:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'supabase'])
import supabase
self.supabase_client = supabase.create_client(self.supabase_url, self.supabase_key)
def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map
# else default to the average of the costs
prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0
if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
elif "replicate" in model:
# replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else:
# calculate average input cost
input_cost_sum = 0
output_cost_sum = 0
for model in model_cost:
input_cost_sum += model_cost[model]["input_cost_per_token"]
output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, end_user, response_obj, start_time, end_time, print_verbose):
try:
print_verbose(f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}")
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
response_time = (end_time-start_time).total_seconds()
if "choices" in response_obj:
supabase_data_obj = {
"response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"response": response_obj['choices'][0]['message']['content'],
"end_user": end_user
}
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
elif "error" in response_obj:
supabase_data_obj = {
"response_time": response_time,
"model": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"error": response_obj['error'],
"end_user": end_user
}
print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}")
data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute()
except:
# traceback.print_exc()
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
pass

View file

@ -1,6 +1,6 @@
#### What this tests ####
# This tests if logging to the helicone integration actually works
# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
# import sys, os
# import traceback
# import pytest

View file

@ -0,0 +1,27 @@
#### What this tests ####
# This tests if logging to the helicone integration actually works
# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
import sys, os
import traceback
import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
litellm.success_callback = ["supabase"]
litellm.failure_callback = ["supabase"]
litellm.modify_integration("supabase",{"table_name": "litellm_logs"})
litellm.set_verbose = True
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
#bad request call
response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -6,6 +6,10 @@ import datetime, time
from anthropic import Anthropic
import tiktoken
encoding = tiktoken.get_encoding("cl100k_base")
from .integrations.helicone import HeliconeLogger
from .integrations.aispend import AISpendLogger
from .integrations.berrispend import BerriSpendLogger
from .integrations.supabase import Supabase
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
####### ENVIRONMENT VARIABLES ###################
dotenv.load_dotenv() # Loading env variables using dotenv
@ -18,6 +22,7 @@ alerts_channel = None
heliconeLogger = None
aispendLogger = None
berrispendLogger = None
supabaseClient = None
callback_list = []
user_logger_fn = None
additional_details = {}
@ -160,7 +165,7 @@ def get_optional_params(
return optional_params
def set_callbacks(callback_list):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient
try:
for callback in callback_list:
if callback == "sentry":
@ -199,16 +204,15 @@ def set_callbacks(callback_list):
alerts_channel = os.environ["SLACK_API_CHANNEL"]
print_verbose(f"Initialized Slack App: {slack_app}")
elif callback == "helicone":
from .integrations.helicone import HeliconeLogger
heliconeLogger = HeliconeLogger()
elif callback == "aispend":
from .integrations.aispend import AISpendLogger
aispendLogger = AISpendLogger()
elif callback == "berrispend":
from .integrations.berrispend import BerriSpendLogger
berrispendLogger = BerriSpendLogger()
except:
pass
elif callback == "supabase":
supabaseClient = Supabase()
except Exception as e:
raise e
def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
@ -287,6 +291,22 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
}
}
aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
elif callback == "supabase":
print_verbose("reaches supabase for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
result = {
"model": model,
"created": time.time(),
"error": traceback_exception,
"usage": {
"prompt_tokens": prompt_token_calculator(model, messages=messages),
"completion_tokens": 0
}
}
print(f"litellm._thread_context: {litellm._thread_context}")
supabaseClient.log_event(model=model, messages=messages, end_user=litellm._thread_context.user, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
except:
print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
pass
@ -354,6 +374,12 @@ def handle_success(args, kwargs, result, start_time, end_time):
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
elif callback == "supabase":
print_verbose("reaches supabase for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
print(f"litellm._thread_context: {litellm._thread_context}")
supabaseClient.log_event(model=model, messages=messages, end_user=litellm._thread_context.user, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
except Exception as e:
## LOGGING
logging(logger_fn=user_logger_fn, exception=e)
@ -369,6 +395,12 @@ def handle_success(args, kwargs, result, start_time, end_time):
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
pass
# integration helper function
def modify_integration(integration_name, integration_params):
global supabaseClient
if integration_name == "supabase":
if "table_name" in integration_params:
Supabase.supabase_table_name = integration_params["table_name"]
def exception_type(model, original_exception):
global user_logger_fn

View file

@ -14,6 +14,7 @@ nav:
- Quick Start: advanced.md
- Output Integrations: client_integrations.md
- Helicone Tutorial: helicone_integration.md
- Supabase Tutorial: supabase_integration.md
- BerriSpend Tutorial: berrispend_integration.md
- 💡 Support:
- Troubleshooting & Help: troubleshoot.md

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "0.1.345"
version = "0.1.346"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT License"