mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
updates
This commit is contained in:
parent
231a3e727b
commit
6ae8384fb5
14 changed files with 59 additions and 36 deletions
|
@ -2,7 +2,7 @@ success_callback = []
|
||||||
failure_callback = []
|
failure_callback = []
|
||||||
set_verbose=False
|
set_verbose=False
|
||||||
telemetry=True
|
telemetry=True
|
||||||
|
max_tokens = 256 # OpenAI Defaults
|
||||||
####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
|
####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
|
||||||
api_base = None
|
api_base = None
|
||||||
headers = None
|
headers = None
|
||||||
|
|
|
@ -189,7 +189,7 @@ def completion(
|
||||||
if max_tokens != float('inf'):
|
if max_tokens != float('inf'):
|
||||||
max_tokens_to_sample = max_tokens
|
max_tokens_to_sample = max_tokens
|
||||||
else:
|
else:
|
||||||
max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
|
max_tokens_to_sample = litellm.max_tokens # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
|
|
|
@ -2,6 +2,7 @@ import dotenv, json, traceback, threading
|
||||||
import subprocess, os
|
import subprocess, os
|
||||||
import litellm, openai
|
import litellm, openai
|
||||||
import random, uuid, requests
|
import random, uuid, requests
|
||||||
|
import datetime
|
||||||
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
@ -11,6 +12,7 @@ add_breadcrumb = None
|
||||||
posthog = None
|
posthog = None
|
||||||
slack_app = None
|
slack_app = None
|
||||||
alerts_channel = None
|
alerts_channel = None
|
||||||
|
heliconeLogger = None
|
||||||
callback_list = []
|
callback_list = []
|
||||||
user_logger_fn = None
|
user_logger_fn = None
|
||||||
additional_details = {}
|
additional_details = {}
|
||||||
|
@ -68,7 +70,7 @@ def client(original_function):
|
||||||
global callback_list, add_breadcrumb
|
global callback_list, add_breadcrumb
|
||||||
if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0:
|
if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0:
|
||||||
callback_list = list(set(litellm.success_callback + litellm.failure_callback))
|
callback_list = list(set(litellm.success_callback + litellm.failure_callback))
|
||||||
set_callbacks(callback_list=callback_list)
|
set_callbacks(callback_list=callback_list,)
|
||||||
if add_breadcrumb:
|
if add_breadcrumb:
|
||||||
add_breadcrumb(
|
add_breadcrumb(
|
||||||
category="litellm.llm_call",
|
category="litellm.llm_call",
|
||||||
|
@ -83,9 +85,11 @@ def client(original_function):
|
||||||
try:
|
try:
|
||||||
function_setup(args, kwargs)
|
function_setup(args, kwargs)
|
||||||
## MODEL CALL
|
## MODEL CALL
|
||||||
|
start_time = datetime.datetime.now()
|
||||||
result = original_function(*args, **kwargs)
|
result = original_function(*args, **kwargs)
|
||||||
|
end_time = datetime.datetime.now()
|
||||||
## LOG SUCCESS
|
## LOG SUCCESS
|
||||||
my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread
|
my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread
|
||||||
my_thread.start()
|
my_thread.start()
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -97,7 +101,7 @@ def client(original_function):
|
||||||
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
def set_callbacks(callback_list):
|
def set_callbacks(callback_list):
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger
|
||||||
try:
|
try:
|
||||||
for callback in callback_list:
|
for callback in callback_list:
|
||||||
if callback == "sentry":
|
if callback == "sentry":
|
||||||
|
@ -134,6 +138,10 @@ def set_callbacks(callback_list):
|
||||||
)
|
)
|
||||||
alerts_channel = os.environ["SLACK_API_CHANNEL"]
|
alerts_channel = os.environ["SLACK_API_CHANNEL"]
|
||||||
print_verbose(f"Initialized Slack App: {slack_app}")
|
print_verbose(f"Initialized Slack App: {slack_app}")
|
||||||
|
elif callback == "helicone":
|
||||||
|
from .integrations.helicone import HeliconeLogger
|
||||||
|
|
||||||
|
heliconeLogger = HeliconeLogger()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -200,7 +208,8 @@ def handle_failure(exception, traceback_exception, args, kwargs):
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def handle_success(*args, **kwargs):
|
def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
|
global heliconeLogger
|
||||||
try:
|
try:
|
||||||
success_handler = additional_details.pop("success_handler", None)
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
failure_handler = additional_details.pop("failure_handler", None)
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
@ -223,6 +232,11 @@ def handle_success(*args, **kwargs):
|
||||||
for detail in additional_details:
|
for detail in additional_details:
|
||||||
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
slack_msg += f"{detail}: {additional_details[detail]}\n"
|
||||||
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
|
slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
|
||||||
|
elif callback == "helicone":
|
||||||
|
print_verbose("reaches helicone for logging!")
|
||||||
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
|
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||||
|
heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
BIN
dist/litellm-0.1.220-py3-none-any.whl
vendored
BIN
dist/litellm-0.1.220-py3-none-any.whl
vendored
Binary file not shown.
BIN
dist/litellm-0.1.220.tar.gz
vendored
BIN
dist/litellm-0.1.220.tar.gz
vendored
Binary file not shown.
BIN
dist/litellm-0.1.222-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.222-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.222.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.222.tar.gz
vendored
Normal file
Binary file not shown.
|
@ -1,21 +1,27 @@
|
||||||
# Advanced - liteLLM client
|
# Advanced - Callbacks
|
||||||
|
|
||||||
## Use liteLLM client to send Output Data to Posthog, Sentry etc
|
## Use Callbacks to send Output Data to Posthog, Sentry etc
|
||||||
liteLLM allows you to create `completion_client` and `embedding_client` to send successfull / error LLM API call data to Posthog, Sentry, Slack etc
|
liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses.
|
||||||
|
|
||||||
|
liteLLM supports:
|
||||||
|
|
||||||
|
- [Helicone](https://docs.helicone.ai/introduction)
|
||||||
|
- [Sentry](https://docs.sentry.io/platforms/python/)
|
||||||
|
- [PostHog](https://posthog.com/docs/libraries/python)
|
||||||
|
- [Slack](https://slack.dev/bolt-python/concepts)
|
||||||
|
|
||||||
### Quick Start
|
### Quick Start
|
||||||
```python
|
```python
|
||||||
from main import litellm_client
|
from litellm import completion
|
||||||
import os
|
|
||||||
|
# set callbacks
|
||||||
|
litellm.success_callback=["posthog", "helicone"]
|
||||||
|
litellm.failure_callback=["sentry"]
|
||||||
|
|
||||||
## set env variables
|
## set env variables
|
||||||
os.environ['SENTRY_API_URL'] = ""
|
os.environ['SENTRY_API_URL'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
||||||
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
|
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
|
||||||
|
os.environ["HELICONE_API_KEY"] = ""
|
||||||
# init liteLLM client
|
|
||||||
client = litellm_client(success_callback=["posthog"], failure_callback=["sentry", "posthog"])
|
|
||||||
completion = client.completion
|
|
||||||
embedding = client.embedding
|
|
||||||
|
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
# Data Logging Integrations
|
# Data Logging Integrations
|
||||||
|
|
||||||
| Integration | Required OS Variables | How to Use with litellm Client |
|
| Integration | Required OS Variables | How to Use with callbacks |
|
||||||
|-----------------|--------------------------------------------|-------------------------------------------|
|
|-----------------|--------------------------------------------|-------------------------------------------|
|
||||||
| Sentry | `SENTRY_API_URL` | `client = litellm_client(success_callback=["sentry"], failure_callback=["sentry"])` |
|
| Sentry | `SENTRY_API_URL` | `litellm.success_callback=["sentry"], litellm.failure_callback=["sentry"]` |
|
||||||
| Posthog | `POSTHOG_API_KEY`,<br>`POSTHOG_API_URL` | `client = litellm_client(success_callback=["posthog"], failure_callback=["posthog"])` |
|
| Posthog | `POSTHOG_API_KEY`,<br>`POSTHOG_API_URL` | `litellm.success_callback=["posthog"], litellm.failure_callback=["posthog"]` |
|
||||||
| Slack | `SLACK_API_TOKEN`,<br>`SLACK_API_SECRET`,<br>`SLACK_API_CHANNEL` | `client = litellm_client(success_callback=["slack"], failure_callback=["slack"])` |
|
| Slack | `SLACK_API_TOKEN`,<br>`SLACK_API_SECRET`,<br>`SLACK_API_CHANNEL` | `litellm.success_callback=["slack"], litellm.failure_callback=["slack"]` |
|
||||||
|
| Helicone | `HELICONE_API_TOKEN` | `litellm.success_callback=["helicone"]` |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,11 @@ a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic AP
|
||||||
###### litellm manages:
|
###### litellm manages:
|
||||||
* Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
|
* Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
|
||||||
* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
|
* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
|
||||||
* **[Advanced]** Automatically logging your output to Sentry, Posthog, Slack [see liteLLM Client](https://litellm.readthedocs.io/en/latest/advanced/)
|
* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
|
||||||
|
|
||||||
|
###### observability:
|
||||||
|
* Logging - see exactly what the raw model request/response is `completion(.., logger_fn=your_logging_fn)`
|
||||||
|
* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
|
Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|------------------|----------------------------------------|--------------------------------------|
|
|------------------|----------------------------------------|--------------------------------------|
|
||||||
| gpt-3.5-turbo | `completion('gpt-3.5-turbo', messages)` | `os.environ['OPENAI_API_KEY']` |
|
| gpt-3.5-turbo | `completion('gpt-3.5-turbo', messages)` | `os.environ['OPENAI_API_KEY']` |
|
||||||
|
| gpt-3.5-turbo-16k | `completion('gpt-3.5-turbo-16k', messages)` | `os.environ['OPENAI_API_KEY']` |
|
||||||
|
| gpt-3.5-turbo-16k-0613 | `completion('gpt-3.5-turbo-16k-0613', messages)` | `os.environ['OPENAI_API_KEY']` |
|
||||||
| gpt-4 | `completion('gpt-4', messages)` | `os.environ['OPENAI_API_KEY']` |
|
| gpt-4 | `completion('gpt-4', messages)` | `os.environ['OPENAI_API_KEY']` |
|
||||||
|
|
||||||
## Azure OpenAI Chat Completion Models
|
## Azure OpenAI Chat Completion Models
|
||||||
|
@ -26,16 +28,11 @@
|
||||||
|------------------|--------------------------------------------|--------------------------------------|
|
|------------------|--------------------------------------------|--------------------------------------|
|
||||||
| command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` |
|
| command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` |
|
||||||
|
|
||||||
### OpenRouter Models
|
|
||||||
|
|
||||||
| Model Name | Function Call | Required OS Variables |
|
### Anthropic Models
|
||||||
|----------------------------------|----------------------------------------------------------------------|---------------------------------------------------------------------------|
|
|
||||||
| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
|------------------|--------------------------------------------|--------------------------------------|
|
||||||
| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
| claude-v2 | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
|
||||||
| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
|
||||||
| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
|
||||||
| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
|
||||||
| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
Metadata-Version: 2.1
|
Metadata-Version: 2.1
|
||||||
Name: litellm
|
Name: litellm
|
||||||
Version: 0.1.220
|
Version: 0.1.222
|
||||||
Summary: Library to easily interface with LLM API providers
|
Summary: Library to easily interface with LLM API providers
|
||||||
Author: BerriAI
|
Author: BerriAI
|
||||||
License-File: LICENSE
|
License-File: LICENSE
|
||||||
|
|
|
@ -112,6 +112,7 @@ def set_callbacks(callback_list):
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
|
||||||
import sentry_sdk
|
import sentry_sdk
|
||||||
sentry_sdk_instance = sentry_sdk
|
sentry_sdk_instance = sentry_sdk
|
||||||
|
sentry_trace_rate = os.environ.get("SENTRY_API_TRACE_RATE") if "SENTRY_API_TRACE_RATE" in os.environ else "1.0"
|
||||||
sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
|
sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
|
||||||
capture_exception = sentry_sdk_instance.capture_exception
|
capture_exception = sentry_sdk_instance.capture_exception
|
||||||
add_breadcrumb = sentry_sdk_instance.add_breadcrumb
|
add_breadcrumb = sentry_sdk_instance.add_breadcrumb
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='litellm',
|
name='litellm',
|
||||||
version='0.1.221',
|
version='0.1.222',
|
||||||
description='Library to easily interface with LLM API providers',
|
description='Library to easily interface with LLM API providers',
|
||||||
author='BerriAI',
|
author='BerriAI',
|
||||||
packages=[
|
packages=[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue