diff --git a/docs/supabase_integration.md b/docs/supabase_integration.md new file mode 100644 index 000000000..ff9348332 --- /dev/null +++ b/docs/supabase_integration.md @@ -0,0 +1,75 @@ +# Supabase Tutorial +[Supabase](https://supabase.com/) is an open source Firebase alternative. +Start your project with a Postgres database, Authentication, instant APIs, Edge Functions, Realtime subscriptions, Storage, and Vector embeddings. + +## Use Supabase to see total spend across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM) +liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. + +In this case, we want to log requests to Supabase in both scenarios - when it succeeds and fails. + +### Create a supabase table + +Go to your Supabase project > go to the [Supabase SQL Editor](https://supabase.com/dashboard/projects) and create a new table with this configuration. + +Note: You can change the table name. Just don't change the column names. + +```sql +create table + public.request_logs ( + id bigint generated by default as identity, + created_at timestamp with time zone null default now(), + model text null default ''::text, + messages json null default '{}'::json, + response json null default '{}'::json, + end_user text null default ''::text, + error json null default '{}'::json, + response_time real null default '0'::real, + total_cost real null, + additional_details json null default '{}'::json, + constraint request_logs_pkey primary key (id) + ) tablespace pg_default; +``` + +### Use Callbacks +Use just 2 lines of code, to instantly see costs and log your responses **across all providers** with Supabase: + +``` +litellm.success_callback=["supabase"] +litellm.failure_callback=["supabase"] +``` + +Complete code +```python +from litellm import completion + +## set env variables +os.environ["SUPABASE_URL"] = "your-supabase-url" +os.environ["SUPABASE_key"] = "your-supabase-key" +os.environ["OPENAI_API_KEY"] = "" + +# set callbacks +litellm.success_callback=["supabase"] +litellm.failure_callback=["supabase"] + +#openai call +response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) + +#bad call +response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad call to test error logging"}]) +``` + +### Additional Controls + +**Different Table name** +If you modified your table name, here's how to pass the new name. + +```python +litellm.modify_integration("supabase",{"table_name": "litellm_logs"}) +``` + +**Identify end-user** +Here's how to map your llm call to an end-user + +```python +litellm.identify({"end_user": "krrish@berri.ai"}) +``` \ No newline at end of file diff --git a/litellm/__init__.py b/litellm/__init__.py index 2d8bbff11..8b1e88e1f 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1,3 +1,4 @@ +import threading success_callback = [] failure_callback = [] set_verbose=False @@ -9,20 +10,16 @@ azure_key = None anthropic_key = None replicate_key = None cohere_key = None -MAX_TOKENS = { - 'gpt-3.5-turbo': 4000, - 'gpt-3.5-turbo-0613': 4000, - 'gpt-3.5-turbo-0301': 4000, - 'gpt-3.5-turbo-16k': 16000, - 'gpt-3.5-turbo-16k-0613': 16000, - 'gpt-4': 8000, - 'gpt-4-0613': 8000, - 'gpt-4-32k': 32000, - 'claude-instant-1': 100000, - 'claude-2': 100000, - 'command-nightly': 4096, - 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096, -} +####### THREAD-SPECIFIC DATA ################### +class MyLocal(threading.local): + def __init__(self): + self.user = "Hello World" + +_thread_context = MyLocal() +def identify(event_details): + # Store user in thread local data + if "user" in event_details: + _thread_context.user = event_details["user"] ####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone api_base = None headers = None @@ -71,6 +68,6 @@ open_ai_embedding_models = [ 'text-embedding-ada-002' ] from .timeout import timeout -from .utils import client, logging, exception_type, get_optional_params # Import all the symbols from main.py +from .utils import client, logging, exception_type, get_optional_params, modify_integration from .main import * # Import all the symbols from main.py from .integrations import * \ No newline at end of file diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index c047a8144..3cf0db7d0 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index e1b1f2e55..6329538c6 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 5f984ca53..999be9cf9 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/__init__.cpython-311.pyc b/litellm/integrations/__pycache__/__init__.cpython-311.pyc index 6ece0d4ef..561733ee0 100644 Binary files a/litellm/integrations/__pycache__/__init__.cpython-311.pyc and b/litellm/integrations/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/aispend.cpython-311.pyc b/litellm/integrations/__pycache__/aispend.cpython-311.pyc index a8231afd7..fcf43e46f 100644 Binary files a/litellm/integrations/__pycache__/aispend.cpython-311.pyc and b/litellm/integrations/__pycache__/aispend.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/berrispend.cpython-311.pyc b/litellm/integrations/__pycache__/berrispend.cpython-311.pyc index b89fd0a31..1878c8246 100644 Binary files a/litellm/integrations/__pycache__/berrispend.cpython-311.pyc and b/litellm/integrations/__pycache__/berrispend.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/supabase.cpython-311.pyc b/litellm/integrations/__pycache__/supabase.cpython-311.pyc new file mode 100644 index 000000000..098597291 Binary files /dev/null and b/litellm/integrations/__pycache__/supabase.cpython-311.pyc differ diff --git a/litellm/integrations/supabase.py b/litellm/integrations/supabase.py new file mode 100644 index 000000000..1ac28763f --- /dev/null +++ b/litellm/integrations/supabase.py @@ -0,0 +1,104 @@ +#### What this does #### +# On success + failure, log events to Supabase + +import dotenv, os +import requests +dotenv.load_dotenv() # Loading env variables using dotenv +import traceback +import datetime, subprocess, sys + +model_cost = { + "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name + "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, + "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name + "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, + "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, + "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, + "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, + "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, + "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, + "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, + "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, + "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, +} + +class Supabase: + # Class variables or attributes + supabase_table_name = "request_logs" + def __init__(self): + # Instance variables + self.supabase_url = os.getenv("SUPABASE_URL") + self.supabase_key = os.getenv("SUPABASE_KEY") + try: + import supabase + except ImportError: + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'supabase']) + import supabase + self.supabase_client = supabase.create_client(self.supabase_url, self.supabase_key) + + def price_calculator(self, model, response_obj, start_time, end_time): + # try and find if the model is in the model_cost map + # else default to the average of the costs + prompt_tokens_cost_usd_dollar = 0 + completion_tokens_cost_usd_dollar = 0 + if model in model_cost: + prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] + completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] + elif "replicate" in model: + # replicate models are charged based on time + # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat + model_run_time = end_time - start_time # assuming time in seconds + cost_usd_dollar = model_run_time * 0.0032 + prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2 + completion_tokens_cost_usd_dollar = cost_usd_dollar / 2 + else: + # calculate average input cost + input_cost_sum = 0 + output_cost_sum = 0 + for model in model_cost: + input_cost_sum += model_cost[model]["input_cost_per_token"] + output_cost_sum += model_cost[model]["output_cost_per_token"] + avg_input_cost = input_cost_sum / len(model_cost.keys()) + avg_output_cost = output_cost_sum / len(model_cost.keys()) + prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] + completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + + def log_event(self, model, messages, end_user, response_obj, start_time, end_time, print_verbose): + try: + print_verbose(f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}") + + prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time) + total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar + + response_time = (end_time-start_time).total_seconds() + if "choices" in response_obj: + supabase_data_obj = { + "response_time": response_time, + "model": response_obj["model"], + "total_cost": total_cost, + "messages": messages, + "response": response_obj['choices'][0]['message']['content'], + "end_user": end_user + } + print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}") + data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute() + elif "error" in response_obj: + supabase_data_obj = { + "response_time": response_time, + "model": response_obj["model"], + "total_cost": total_cost, + "messages": messages, + "error": response_obj['error'], + "end_user": end_user + } + print_verbose(f"Supabase Logging - final data object: {supabase_data_obj}") + data, count = self.supabase_client.table(self.supabase_table_name).insert(supabase_data_obj).execute() + + except: + # traceback.print_exc() + print_verbose(f"Supabase Logging Error - {traceback.format_exc()}") + pass diff --git a/litellm/tests/test_berrispend_integration.py b/litellm/tests/test_berrispend_integration.py index c93b4de5c..122c9201d 100644 --- a/litellm/tests/test_berrispend_integration.py +++ b/litellm/tests/test_berrispend_integration.py @@ -1,6 +1,6 @@ #### What this tests #### # This tests if logging to the helicone integration actually works - +# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this # import sys, os # import traceback # import pytest diff --git a/litellm/tests/test_supabase_integration.py b/litellm/tests/test_supabase_integration.py new file mode 100644 index 000000000..7923b967d --- /dev/null +++ b/litellm/tests/test_supabase_integration.py @@ -0,0 +1,27 @@ +#### What this tests #### +# This tests if logging to the helicone integration actually works +# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this +import sys, os +import traceback +import pytest + +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import litellm +from litellm import embedding, completion + +litellm.success_callback = ["supabase"] +litellm.failure_callback = ["supabase"] + +litellm.modify_integration("supabase",{"table_name": "litellm_logs"}) + +litellm.set_verbose = True + +user_message = "Hello, how are you?" +messages = [{ "content": user_message,"role": "user"}] + + +#openai call +response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) + +#bad request call +response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}]) \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 076378b1d..6e1026c1f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6,6 +6,10 @@ import datetime, time from anthropic import Anthropic import tiktoken encoding = tiktoken.get_encoding("cl100k_base") +from .integrations.helicone import HeliconeLogger +from .integrations.aispend import AISpendLogger +from .integrations.berrispend import BerriSpendLogger +from .integrations.supabase import Supabase from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv @@ -18,6 +22,7 @@ alerts_channel = None heliconeLogger = None aispendLogger = None berrispendLogger = None +supabaseClient = None callback_list = [] user_logger_fn = None additional_details = {} @@ -160,7 +165,7 @@ def get_optional_params( return optional_params def set_callbacks(callback_list): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient try: for callback in callback_list: if callback == "sentry": @@ -199,16 +204,15 @@ def set_callbacks(callback_list): alerts_channel = os.environ["SLACK_API_CHANNEL"] print_verbose(f"Initialized Slack App: {slack_app}") elif callback == "helicone": - from .integrations.helicone import HeliconeLogger heliconeLogger = HeliconeLogger() elif callback == "aispend": - from .integrations.aispend import AISpendLogger aispendLogger = AISpendLogger() elif callback == "berrispend": - from .integrations.berrispend import BerriSpendLogger berrispendLogger = BerriSpendLogger() - except: - pass + elif callback == "supabase": + supabaseClient = Supabase() + except Exception as e: + raise e def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs): @@ -287,6 +291,22 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k } } aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose) + elif callback == "supabase": + print_verbose("reaches supabase for logging!") + model = args[0] if len(args) > 0 else kwargs["model"] + messages = args[1] if len(args) > 1 else kwargs["messages"] + result = { + "model": model, + "created": time.time(), + "error": traceback_exception, + "usage": { + "prompt_tokens": prompt_token_calculator(model, messages=messages), + "completion_tokens": 0 + } + } + print(f"litellm._thread_context: {litellm._thread_context}") + supabaseClient.log_event(model=model, messages=messages, end_user=litellm._thread_context.user, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose) + except: print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}") pass @@ -354,6 +374,12 @@ def handle_success(args, kwargs, result, start_time, end_time): model = args[0] if len(args) > 0 else kwargs["model"] messages = args[1] if len(args) > 1 else kwargs["messages"] berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose) + elif callback == "supabase": + print_verbose("reaches supabase for logging!") + model = args[0] if len(args) > 0 else kwargs["model"] + messages = args[1] if len(args) > 1 else kwargs["messages"] + print(f"litellm._thread_context: {litellm._thread_context}") + supabaseClient.log_event(model=model, messages=messages, end_user=litellm._thread_context.user, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose) except Exception as e: ## LOGGING logging(logger_fn=user_logger_fn, exception=e) @@ -369,6 +395,12 @@ def handle_success(args, kwargs, result, start_time, end_time): print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}") pass +# integration helper function +def modify_integration(integration_name, integration_params): + global supabaseClient + if integration_name == "supabase": + if "table_name" in integration_params: + Supabase.supabase_table_name = integration_params["table_name"] def exception_type(model, original_exception): global user_logger_fn diff --git a/mkdocs.yml b/mkdocs.yml index 2091774ae..7498ecc38 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -14,6 +14,7 @@ nav: - Quick Start: advanced.md - Output Integrations: client_integrations.md - Helicone Tutorial: helicone_integration.md + - Supabase Tutorial: supabase_integration.md - BerriSpend Tutorial: berrispend_integration.md - 💡 Support: - Troubleshooting & Help: troubleshoot.md diff --git a/pyproject.toml b/pyproject.toml index dda6e0581..4369eb619 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.345" +version = "0.1.346" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"