This commit is contained in:
Krrish Dholakia 2023-08-03 06:57:30 -07:00
parent 231a3e727b
commit 6ae8384fb5
14 changed files with 59 additions and 36 deletions

View file

@ -2,7 +2,7 @@ success_callback = []
failure_callback = [] failure_callback = []
set_verbose=False set_verbose=False
telemetry=True telemetry=True
max_tokens = 256 # OpenAI Defaults
####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone ####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
api_base = None api_base = None
headers = None headers = None

View file

@ -189,7 +189,7 @@ def completion(
if max_tokens != float('inf'): if max_tokens != float('inf'):
max_tokens_to_sample = max_tokens max_tokens_to_sample = max_tokens
else: else:
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries max_tokens_to_sample = litellm.max_tokens # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
## LOGGING ## LOGGING
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
## COMPLETION CALL ## COMPLETION CALL

View file

@ -2,6 +2,7 @@ import dotenv, json, traceback, threading
import subprocess, os import subprocess, os
import litellm, openai import litellm, openai
import random, uuid, requests import random, uuid, requests
import datetime
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
####### ENVIRONMENT VARIABLES ################### ####### ENVIRONMENT VARIABLES ###################
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
@ -11,6 +12,7 @@ add_breadcrumb = None
posthog = None posthog = None
slack_app = None slack_app = None
alerts_channel = None alerts_channel = None
heliconeLogger = None
callback_list = [] callback_list = []
user_logger_fn = None user_logger_fn = None
additional_details = {} additional_details = {}
@ -68,7 +70,7 @@ def client(original_function):
global callback_list, add_breadcrumb global callback_list, add_breadcrumb
if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0:
callback_list = list(set(litellm.success_callback + litellm.failure_callback)) callback_list = list(set(litellm.success_callback + litellm.failure_callback))
set_callbacks(callback_list=callback_list) set_callbacks(callback_list=callback_list,)
if add_breadcrumb: if add_breadcrumb:
add_breadcrumb( add_breadcrumb(
category="litellm.llm_call", category="litellm.llm_call",
@ -83,9 +85,11 @@ def client(original_function):
try: try:
function_setup(args, kwargs) function_setup(args, kwargs)
## MODEL CALL ## MODEL CALL
start_time = datetime.datetime.now()
result = original_function(*args, **kwargs) result = original_function(*args, **kwargs)
end_time = datetime.datetime.now()
## LOG SUCCESS ## LOG SUCCESS
my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread
my_thread.start() my_thread.start()
return result return result
except Exception as e: except Exception as e:
@ -97,7 +101,7 @@ def client(original_function):
####### HELPER FUNCTIONS ################ ####### HELPER FUNCTIONS ################
def set_callbacks(callback_list): def set_callbacks(callback_list):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger
try: try:
for callback in callback_list: for callback in callback_list:
if callback == "sentry": if callback == "sentry":
@ -134,6 +138,10 @@ def set_callbacks(callback_list):
) )
alerts_channel = os.environ["SLACK_API_CHANNEL"] alerts_channel = os.environ["SLACK_API_CHANNEL"]
print_verbose(f"Initialized Slack App: {slack_app}") print_verbose(f"Initialized Slack App: {slack_app}")
elif callback == "helicone":
from .integrations.helicone import HeliconeLogger
heliconeLogger = HeliconeLogger()
except: except:
pass pass
@ -200,7 +208,8 @@ def handle_failure(exception, traceback_exception, args, kwargs):
except: except:
pass pass
def handle_success(*args, **kwargs): def handle_success(args, kwargs, result, start_time, end_time):
global heliconeLogger
try: try:
success_handler = additional_details.pop("success_handler", None) success_handler = additional_details.pop("success_handler", None)
failure_handler = additional_details.pop("failure_handler", None) failure_handler = additional_details.pop("failure_handler", None)
@ -223,6 +232,11 @@ def handle_success(*args, **kwargs):
for detail in additional_details: for detail in additional_details:
slack_msg += f"{detail}: {additional_details[detail]}\n" slack_msg += f"{detail}: {additional_details[detail]}\n"
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg) slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
elif callback == "helicone":
print_verbose("reaches helicone for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time)
except: except:
pass pass

Binary file not shown.

Binary file not shown.

BIN
dist/litellm-0.1.222-py3-none-any.whl vendored Normal file

Binary file not shown.

BIN
dist/litellm-0.1.222.tar.gz vendored Normal file

Binary file not shown.

View file

@ -1,21 +1,27 @@
# Advanced - liteLLM client # Advanced - Callbacks
## Use liteLLM client to send Output Data to Posthog, Sentry etc ## Use Callbacks to send Output Data to Posthog, Sentry etc
liteLLM allows you to create `completion_client` and `embedding_client` to send successfull / error LLM API call data to Posthog, Sentry, Slack etc liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses.
liteLLM supports:
- [Helicone](https://docs.helicone.ai/introduction)
- [Sentry](https://docs.sentry.io/platforms/python/)
- [PostHog](https://posthog.com/docs/libraries/python)
- [Slack](https://slack.dev/bolt-python/concepts)
### Quick Start ### Quick Start
```python ```python
from main import litellm_client from litellm import completion
import os
# set callbacks
litellm.success_callback=["posthog", "helicone"]
litellm.failure_callback=["sentry"]
## set env variables ## set env variables
os.environ['SENTRY_API_URL'] = "" os.environ['SENTRY_API_URL'], os.environ['SENTRY_API_TRACE_RATE']= ""
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url" os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
os.environ["HELICONE_API_KEY"] = ""
# init liteLLM client
client = litellm_client(success_callback=["posthog"], failure_callback=["sentry", "posthog"])
completion = client.completion
embedding = client.embedding
response = completion(model="gpt-3.5-turbo", messages=messages) response = completion(model="gpt-3.5-turbo", messages=messages)
``` ```

View file

@ -1,10 +1,11 @@
# Data Logging Integrations # Data Logging Integrations
| Integration | Required OS Variables | How to Use with litellm Client | | Integration | Required OS Variables | How to Use with callbacks |
|-----------------|--------------------------------------------|-------------------------------------------| |-----------------|--------------------------------------------|-------------------------------------------|
| Sentry | `SENTRY_API_URL` | `client = litellm_client(success_callback=["sentry"], failure_callback=["sentry"])` | | Sentry | `SENTRY_API_URL` | `litellm.success_callback=["sentry"], litellm.failure_callback=["sentry"]` |
| Posthog | `POSTHOG_API_KEY`,<br>`POSTHOG_API_URL` | `client = litellm_client(success_callback=["posthog"], failure_callback=["posthog"])` | | Posthog | `POSTHOG_API_KEY`,<br>`POSTHOG_API_URL` | `litellm.success_callback=["posthog"], litellm.failure_callback=["posthog"]` |
| Slack | `SLACK_API_TOKEN`,<br>`SLACK_API_SECRET`,<br>`SLACK_API_CHANNEL` | `client = litellm_client(success_callback=["slack"], failure_callback=["slack"])` | | Slack | `SLACK_API_TOKEN`,<br>`SLACK_API_SECRET`,<br>`SLACK_API_CHANNEL` | `litellm.success_callback=["slack"], litellm.failure_callback=["slack"]` |
| Helicone | `HELICONE_API_TOKEN` | `litellm.success_callback=["helicone"]` |

View file

@ -4,7 +4,11 @@ a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic AP
###### litellm manages: ###### litellm manages:
* Calling all LLM APIs using the OpenAI format - `completion(model, messages)` * Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']` * Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
* **[Advanced]** Automatically logging your output to Sentry, Posthog, Slack [see liteLLM Client](https://litellm.readthedocs.io/en/latest/advanced/) * Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
###### observability:
* Logging - see exactly what the raw model request/response is `completion(.., logger_fn=your_logging_fn)`
* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
## Quick Start ## Quick Start
Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing) Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)

View file

@ -5,6 +5,8 @@
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|------------------|----------------------------------------|--------------------------------------| |------------------|----------------------------------------|--------------------------------------|
| gpt-3.5-turbo | `completion('gpt-3.5-turbo', messages)` | `os.environ['OPENAI_API_KEY']` | | gpt-3.5-turbo | `completion('gpt-3.5-turbo', messages)` | `os.environ['OPENAI_API_KEY']` |
| gpt-3.5-turbo-16k | `completion('gpt-3.5-turbo-16k', messages)` | `os.environ['OPENAI_API_KEY']` |
| gpt-3.5-turbo-16k-0613 | `completion('gpt-3.5-turbo-16k-0613', messages)` | `os.environ['OPENAI_API_KEY']` |
| gpt-4 | `completion('gpt-4', messages)` | `os.environ['OPENAI_API_KEY']` | | gpt-4 | `completion('gpt-4', messages)` | `os.environ['OPENAI_API_KEY']` |
## Azure OpenAI Chat Completion Models ## Azure OpenAI Chat Completion Models
@ -26,16 +28,11 @@
|------------------|--------------------------------------------|--------------------------------------| |------------------|--------------------------------------------|--------------------------------------|
| command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` | | command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` |
### OpenRouter Models
| Model Name | Function Call | Required OS Variables | ### Anthropic Models
|----------------------------------|----------------------------------------------------------------------|---------------------------------------------------------------------------|
| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` | | Model Name | Function Call | Required OS Variables |
| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` | |------------------|--------------------------------------------|--------------------------------------|
| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` | | claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` | | claude-v2 | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |

View file

@ -1,6 +1,6 @@
Metadata-Version: 2.1 Metadata-Version: 2.1
Name: litellm Name: litellm
Version: 0.1.220 Version: 0.1.222
Summary: Library to easily interface with LLM API providers Summary: Library to easily interface with LLM API providers
Author: BerriAI Author: BerriAI
License-File: LICENSE License-File: LICENSE

View file

@ -112,6 +112,7 @@ def set_callbacks(callback_list):
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk']) subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
import sentry_sdk import sentry_sdk
sentry_sdk_instance = sentry_sdk sentry_sdk_instance = sentry_sdk
sentry_trace_rate = os.environ.get("SENTRY_API_TRACE_RATE") if "SENTRY_API_TRACE_RATE" in os.environ else "1.0"
sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
capture_exception = sentry_sdk_instance.capture_exception capture_exception = sentry_sdk_instance.capture_exception
add_breadcrumb = sentry_sdk_instance.add_breadcrumb add_breadcrumb = sentry_sdk_instance.add_breadcrumb

View file

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup( setup(
name='litellm', name='litellm',
version='0.1.221', version='0.1.222',
description='Library to easily interface with LLM API providers', description='Library to easily interface with LLM API providers',
author='BerriAI', author='BerriAI',
packages=[ packages=[