forked from phoenix/litellm-mirror
Merge branch 'BerriAI:main' into main
This commit is contained in:
commit
b89b3d8c44
102 changed files with 8852 additions and 6557 deletions
|
@ -150,4 +150,20 @@ response = image_generation(
|
|||
model="bedrock/stability.stable-diffusion-xl-v0",
|
||||
)
|
||||
print(f"response: {response}")
|
||||
```
|
||||
|
||||
## VertexAI - Image Generation Models
|
||||
|
||||
### Usage
|
||||
|
||||
Use this for image generation models on VertexAI
|
||||
|
||||
```python
|
||||
response = litellm.image_generation(
|
||||
prompt="An olympic size swimming pool",
|
||||
model="vertex_ai/imagegeneration@006",
|
||||
vertex_ai_project="adroit-crow-413218",
|
||||
vertex_ai_location="us-central1",
|
||||
)
|
||||
print(f"response: {response}")
|
||||
```
|
173
docs/my-website/docs/observability/lago.md
Normal file
173
docs/my-website/docs/observability/lago.md
Normal file
|
@ -0,0 +1,173 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Lago - Usage Based Billing
|
||||
|
||||
[Lago](https://www.getlago.com/) offers a self-hosted and cloud, metering and usage-based billing solution.
|
||||
|
||||
<Image img={require('../../img/lago.jpeg')} />
|
||||
|
||||
## Quick Start
|
||||
Use just 1 lines of code, to instantly log your responses **across all providers** with Lago
|
||||
|
||||
Get your Lago [API Key](https://docs.getlago.com/guide/self-hosted/docker#find-your-api-key)
|
||||
|
||||
```python
|
||||
litellm.callbacks = ["lago"] # logs cost + usage of successful calls to lago
|
||||
```
|
||||
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
# pip install lago
|
||||
import litellm
|
||||
import os
|
||||
|
||||
os.environ["LAGO_API_BASE"] = "" # http://0.0.0.0:3000
|
||||
os.environ["LAGO_API_KEY"] = ""
|
||||
os.environ["LAGO_API_EVENT_CODE"] = "" # The billable metric's code - https://docs.getlago.com/guide/events/ingesting-usage#define-a-billable-metric
|
||||
|
||||
# LLM API Keys
|
||||
os.environ['OPENAI_API_KEY']=""
|
||||
|
||||
# set lago as a callback, litellm will send the data to lago
|
||||
litellm.success_callback = ["lago"]
|
||||
|
||||
# openai call
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||
],
|
||||
user="your_customer_id" # 👈 SET YOUR CUSTOMER ID HERE
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
1. Add to Config.yaml
|
||||
```yaml
|
||||
model_list:
|
||||
- litellm_params:
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
api_key: my-fake-key
|
||||
model: openai/my-fake-model
|
||||
model_name: fake-openai-endpoint
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["lago"] # 👈 KEY CHANGE
|
||||
```
|
||||
|
||||
2. Start Proxy
|
||||
|
||||
```
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="curl" label="Curl">
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data ' {
|
||||
"model": "fake-openai-endpoint",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
],
|
||||
"user": "your-customer-id" # 👈 SET YOUR CUSTOMER ID
|
||||
}
|
||||
'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="openai_python" label="OpenAI Python SDK">
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
], user="my_customer_id") # 👈 whatever your customer id is
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="langchain" label="Langchain">
|
||||
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
import os
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "anything"
|
||||
|
||||
chat = ChatOpenAI(
|
||||
openai_api_base="http://0.0.0.0:4000",
|
||||
model = "gpt-3.5-turbo",
|
||||
temperature=0.1,
|
||||
extra_body={
|
||||
"user": "my_customer_id" # 👈 whatever your customer id is
|
||||
}
|
||||
)
|
||||
|
||||
messages = [
|
||||
SystemMessage(
|
||||
content="You are a helpful assistant that im using to make a test request to."
|
||||
),
|
||||
HumanMessage(
|
||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||
),
|
||||
]
|
||||
response = chat(messages)
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
<Image img={require('../../img/lago_2.png')} />
|
||||
|
||||
## Advanced - Lagos Logging object
|
||||
|
||||
This is what LiteLLM will log to Lagos
|
||||
|
||||
```
|
||||
{
|
||||
"event": {
|
||||
"transaction_id": "<generated_unique_id>",
|
||||
"external_customer_id": <litellm_end_user_id>, # passed via `user` param in /chat/completion call - https://platform.openai.com/docs/api-reference/chat/create
|
||||
"code": os.getenv("LAGO_API_EVENT_CODE"),
|
||||
"properties": {
|
||||
"input_tokens": <number>,
|
||||
"output_tokens": <number>,
|
||||
"model": <string>,
|
||||
"response_cost": <number>, # 👈 LITELLM CALCULATED RESPONSE COST - https://github.com/BerriAI/litellm/blob/d43f75150a65f91f60dc2c0c9462ce3ffc713c1f/litellm/utils.py#L1473
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
|
@ -71,6 +71,23 @@ response = litellm.completion(
|
|||
)
|
||||
print(response)
|
||||
```
|
||||
|
||||
### Make LiteLLM Proxy use Custom `LANGSMITH_BASE_URL`
|
||||
|
||||
If you're using a custom LangSmith instance, you can set the
|
||||
`LANGSMITH_BASE_URL` environment variable to point to your instance.
|
||||
For example, you can make LiteLLM Proxy log to a local LangSmith instance with
|
||||
this config:
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
success_callback: ["langsmith"]
|
||||
|
||||
environment_variables:
|
||||
LANGSMITH_BASE_URL: "http://localhost:1984"
|
||||
LANGSMITH_PROJECT: "litellm-proxy"
|
||||
```
|
||||
|
||||
## Support & Talk to Founders
|
||||
|
||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||
|
|
|
@ -20,7 +20,7 @@ Use just 2 lines of code, to instantly log your responses **across all providers
|
|||
Get your OpenMeter API Key from https://openmeter.cloud/meters
|
||||
|
||||
```python
|
||||
litellm.success_callback = ["openmeter"] # logs cost + usage of successful calls to openmeter
|
||||
litellm.callbacks = ["openmeter"] # logs cost + usage of successful calls to openmeter
|
||||
```
|
||||
|
||||
|
||||
|
@ -28,7 +28,7 @@ litellm.success_callback = ["openmeter"] # logs cost + usage of successful calls
|
|||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
# pip install langfuse
|
||||
# pip install openmeter
|
||||
import litellm
|
||||
import os
|
||||
|
||||
|
@ -39,8 +39,8 @@ os.environ["OPENMETER_API_KEY"] = ""
|
|||
# LLM API Keys
|
||||
os.environ['OPENAI_API_KEY']=""
|
||||
|
||||
# set langfuse as a callback, litellm will send the data to langfuse
|
||||
litellm.success_callback = ["openmeter"]
|
||||
# set openmeter as a callback, litellm will send the data to openmeter
|
||||
litellm.callbacks = ["openmeter"]
|
||||
|
||||
# openai call
|
||||
response = litellm.completion(
|
||||
|
@ -64,7 +64,7 @@ model_list:
|
|||
model_name: fake-openai-endpoint
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["openmeter"] # 👈 KEY CHANGE
|
||||
callbacks: ["openmeter"] # 👈 KEY CHANGE
|
||||
```
|
||||
|
||||
2. Start Proxy
|
||||
|
|
|
@ -223,6 +223,32 @@ assert isinstance(
|
|||
|
||||
```
|
||||
|
||||
### Setting `anthropic-beta` Header in Requests
|
||||
|
||||
Pass the the `extra_headers` param to litellm, All headers will be forwarded to Anthropic API
|
||||
|
||||
```python
|
||||
response = completion(
|
||||
model="anthropic/claude-3-opus-20240229",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
)
|
||||
```
|
||||
|
||||
### Forcing Anthropic Tool Use
|
||||
|
||||
If you want Claude to use a specific tool to answer the user’s question
|
||||
|
||||
You can do this by specifying the tool in the `tool_choice` field like so:
|
||||
```python
|
||||
response = completion(
|
||||
model="anthropic/claude-3-opus-20240229",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice={"type": "tool", "name": "get_weather"},
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
### Parallel Function Calling
|
||||
|
||||
|
|
|
@ -101,13 +101,19 @@ Ollama supported models: https://github.com/ollama/ollama
|
|||
|
||||
| Model Name | Function Call |
|
||||
|----------------------|-----------------------------------------------------------------------------------
|
||||
| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||
| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` |
|
||||
| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||
| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||
| Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` |
|
||||
| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` |
|
||||
| Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
| Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |
|
||||
|
|
|
@ -188,6 +188,7 @@ These also support the `OPENAI_API_BASE` environment variable, which can be used
|
|||
## OpenAI Vision Models
|
||||
| Model Name | Function Call |
|
||||
|-----------------------|-----------------------------------------------------------------|
|
||||
| gpt-4o | `response = completion(model="gpt-4o", messages=messages)` |
|
||||
| gpt-4-turbo | `response = completion(model="gpt-4-turbo", messages=messages)` |
|
||||
| gpt-4-vision-preview | `response = completion(model="gpt-4-vision-preview", messages=messages)` |
|
||||
|
||||
|
|
|
@ -508,6 +508,31 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
|
|||
| text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
|
||||
| text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` |
|
||||
|
||||
## Image Generation Models
|
||||
|
||||
Usage
|
||||
|
||||
```python
|
||||
response = await litellm.aimage_generation(
|
||||
prompt="An olympic size swimming pool",
|
||||
model="vertex_ai/imagegeneration@006",
|
||||
vertex_ai_project="adroit-crow-413218",
|
||||
vertex_ai_location="us-central1",
|
||||
)
|
||||
```
|
||||
|
||||
**Generating multiple images**
|
||||
|
||||
Use the `n` parameter to pass how many images you want generated
|
||||
```python
|
||||
response = await litellm.aimage_generation(
|
||||
prompt="An olympic size swimming pool",
|
||||
model="vertex_ai/imagegeneration@006",
|
||||
vertex_ai_project="adroit-crow-413218",
|
||||
vertex_ai_location="us-central1",
|
||||
n=1,
|
||||
)
|
||||
```
|
||||
|
||||
## Extra
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# 🚨 Alerting
|
||||
# 🚨 Alerting / Webhooks
|
||||
|
||||
Get alerts for:
|
||||
|
||||
|
@ -11,7 +11,7 @@ Get alerts for:
|
|||
- Daily Reports:
|
||||
- **LLM** Top 5 slowest deployments
|
||||
- **LLM** Top 5 deployments with most failed requests
|
||||
- **Spend** Weekly & Monthly spend per Team, Tag
|
||||
- **Spend** Weekly & Monthly spend per Team, Tag
|
||||
|
||||
|
||||
## Quick Start
|
||||
|
@ -61,10 +61,38 @@ curl -X GET 'http://localhost:4000/health/services?service=slack' \
|
|||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
## Advanced - Opting into specific alert types
|
||||
|
||||
## Extras
|
||||
Set `alert_types` if you want to Opt into only specific alert types
|
||||
|
||||
### Using Discord Webhooks
|
||||
```shell
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
alert_types: ["spend_reports"]
|
||||
```
|
||||
|
||||
All Possible Alert Types
|
||||
|
||||
```python
|
||||
alert_types:
|
||||
Optional[
|
||||
List[
|
||||
Literal[
|
||||
"llm_exceptions",
|
||||
"llm_too_slow",
|
||||
"llm_requests_hanging",
|
||||
"budget_alerts",
|
||||
"db_exceptions",
|
||||
"daily_reports",
|
||||
"spend_reports",
|
||||
"cooldown_deployment",
|
||||
"new_model_added",
|
||||
]
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
## Advanced - Using Discord Webhooks
|
||||
|
||||
Discord provides a slack compatible webhook url that you can use for alerting
|
||||
|
||||
|
@ -96,3 +124,80 @@ environment_variables:
|
|||
```
|
||||
|
||||
That's it ! You're ready to go !
|
||||
|
||||
## Advanced - [BETA] Webhooks for Budget Alerts
|
||||
|
||||
**Note**: This is a beta feature, so the spec might change.
|
||||
|
||||
Set a webhook to get notified for budget alerts.
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
Add url to your environment, for testing you can use a link from [here](https://webhook.site/)
|
||||
|
||||
```bash
|
||||
export WEBHOOK_URL="https://webhook.site/6ab090e8-c55f-4a23-b075-3209f5c57906"
|
||||
```
|
||||
|
||||
Add 'webhook' to config.yaml
|
||||
```yaml
|
||||
general_settings:
|
||||
alerting: ["webhook"] # 👈 KEY CHANGE
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
|
||||
# RUNNING on http://0.0.0.0:4000
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \
|
||||
--header 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
**Expected Response**
|
||||
|
||||
```bash
|
||||
{
|
||||
"spend": 1, # the spend for the 'event_group'
|
||||
"max_budget": 0, # the 'max_budget' set for the 'event_group'
|
||||
"token": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||
"user_id": "default_user_id",
|
||||
"team_id": null,
|
||||
"user_email": null,
|
||||
"key_alias": null,
|
||||
"projected_exceeded_data": null,
|
||||
"projected_spend": null,
|
||||
"event": "budget_crossed", # Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
|
||||
"event_group": "user",
|
||||
"event_message": "User Budget: Budget Crossed"
|
||||
}
|
||||
```
|
||||
|
||||
**API Spec for Webhook Event**
|
||||
|
||||
- `spend` *float*: The current spend amount for the 'event_group'.
|
||||
- `max_budget` *float*: The maximum allowed budget for the 'event_group'.
|
||||
- `token` *str*: A hashed value of the key, used for authentication or identification purposes.
|
||||
- `user_id` *str or null*: The ID of the user associated with the event (optional).
|
||||
- `team_id` *str or null*: The ID of the team associated with the event (optional).
|
||||
- `user_email` *str or null*: The email of the user associated with the event (optional).
|
||||
- `key_alias` *str or null*: An alias for the key associated with the event (optional).
|
||||
- `projected_exceeded_date` *str or null*: The date when the budget is projected to be exceeded, returned when 'soft_budget' is set for key (optional).
|
||||
- `projected_spend` *float or null*: The projected spend amount, returned when 'soft_budget' is set for key (optional).
|
||||
- `event` *Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]*: The type of event that triggered the webhook. Possible values are:
|
||||
* "budget_crossed": Indicates that the spend has exceeded the max budget.
|
||||
* "threshold_crossed": Indicates that spend has crossed a threshold (currently sent when 85% and 95% of budget is reached).
|
||||
* "projected_limit_exceeded": For "key" only - Indicates that the projected spend is expected to exceed the soft budget threshold.
|
||||
- `event_group` *Literal["user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
|
||||
* "user": The event is related to a specific user.
|
||||
* "key": The event is related to a specific key.
|
||||
* "team": The event is related to a team.
|
||||
* "proxy": The event is related to a proxy.
|
||||
|
||||
- `event_message` *str*: A human-readable description of the event.
|
319
docs/my-website/docs/proxy/billing.md
Normal file
319
docs/my-website/docs/proxy/billing.md
Normal file
|
@ -0,0 +1,319 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# 💵 Billing
|
||||
|
||||
Bill internal teams, external customers for their usage
|
||||
|
||||
**🚨 Requirements**
|
||||
- [Setup Lago](https://docs.getlago.com/guide/self-hosted/docker#run-the-app), for usage-based billing. We recommend following [their Stripe tutorial](https://docs.getlago.com/templates/per-transaction/stripe#step-1-create-billable-metrics-for-transaction)
|
||||
|
||||
Steps:
|
||||
- Connect the proxy to Lago
|
||||
- Set the id you want to bill for (customers, internal users, teams)
|
||||
- Start!
|
||||
|
||||
## Quick Start
|
||||
|
||||
Bill internal teams for their usage
|
||||
|
||||
### 1. Connect proxy to Lago
|
||||
|
||||
Set 'lago' as a callback on your proxy config.yaml
|
||||
|
||||
```yaml
|
||||
model_name:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["lago"] # 👈 KEY CHANGE
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
```
|
||||
|
||||
Add your Lago keys to the environment
|
||||
|
||||
```bash
|
||||
export LAGO_API_BASE="http://localhost:3000" # self-host - https://docs.getlago.com/guide/self-hosted/docker#run-the-app
|
||||
export LAGO_API_KEY="3e29d607-de54-49aa-a019-ecf585729070" # Get key - https://docs.getlago.com/guide/self-hosted/docker#find-your-api-key
|
||||
export LAGO_API_EVENT_CODE="openai_tokens" # name of lago billing code
|
||||
export LAGO_API_CHARGE_BY="team_id" # 👈 Charges 'team_id' attached to proxy key
|
||||
```
|
||||
|
||||
Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
### 2. Create Key for Internal Team
|
||||
|
||||
```bash
|
||||
curl 'http://0.0.0.0:4000/key/generate' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{"team_id": "my-unique-id"}' # 👈 Internal Team's ID
|
||||
```
|
||||
|
||||
Response Object:
|
||||
|
||||
```bash
|
||||
{
|
||||
"key": "sk-tXL0wt5-lOOVK9sfY2UacA",
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### 3. Start billing!
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="curl" label="Curl">
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer sk-tXL0wt5-lOOVK9sfY2UacA' \ # 👈 Team's Key
|
||||
--data ' {
|
||||
"model": "fake-openai-endpoint",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
],
|
||||
}
|
||||
'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="openai_python" label="OpenAI Python SDK">
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="sk-tXL0wt5-lOOVK9sfY2UacA", # 👈 Team's Key
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
])
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="langchain" label="Langchain">
|
||||
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
import os
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "sk-tXL0wt5-lOOVK9sfY2UacA" # 👈 Team's Key
|
||||
|
||||
chat = ChatOpenAI(
|
||||
openai_api_base="http://0.0.0.0:4000",
|
||||
model = "gpt-3.5-turbo",
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
messages = [
|
||||
SystemMessage(
|
||||
content="You are a helpful assistant that im using to make a test request to."
|
||||
),
|
||||
HumanMessage(
|
||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||
),
|
||||
]
|
||||
response = chat(messages)
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
**See Results on Lago**
|
||||
|
||||
|
||||
<Image img={require('../../img/lago_2.png')} style={{ width: '500px', height: 'auto' }} />
|
||||
|
||||
## Advanced - Lago Logging object
|
||||
|
||||
This is what LiteLLM will log to Lagos
|
||||
|
||||
```
|
||||
{
|
||||
"event": {
|
||||
"transaction_id": "<generated_unique_id>",
|
||||
"external_customer_id": <selected_id>, # either 'end_user_id', 'user_id', or 'team_id'. Default 'end_user_id'.
|
||||
"code": os.getenv("LAGO_API_EVENT_CODE"),
|
||||
"properties": {
|
||||
"input_tokens": <number>,
|
||||
"output_tokens": <number>,
|
||||
"model": <string>,
|
||||
"response_cost": <number>, # 👈 LITELLM CALCULATED RESPONSE COST - https://github.com/BerriAI/litellm/blob/d43f75150a65f91f60dc2c0c9462ce3ffc713c1f/litellm/utils.py#L1473
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced - Bill Customers, Internal Users
|
||||
|
||||
For:
|
||||
- Customers (id passed via 'user' param in /chat/completion call) = 'end_user_id'
|
||||
- Internal Users (id set when [creating keys](https://docs.litellm.ai/docs/proxy/virtual_keys#advanced---spend-tracking)) = 'user_id'
|
||||
- Teams (id set when [creating keys](https://docs.litellm.ai/docs/proxy/virtual_keys#advanced---spend-tracking)) = 'team_id'
|
||||
|
||||
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="customers" label="Customer Billing">
|
||||
|
||||
1. Set 'LAGO_API_CHARGE_BY' to 'end_user_id'
|
||||
|
||||
```bash
|
||||
export LAGO_API_CHARGE_BY="end_user_id"
|
||||
```
|
||||
|
||||
2. Test it!
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="curl" label="Curl">
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data ' {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
],
|
||||
"user": "my_customer_id" # 👈 whatever your customer id is
|
||||
}
|
||||
'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="openai_sdk" label="OpenAI Python SDK">
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
], user="my_customer_id") # 👈 whatever your customer id is
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="langchain" label="Langchain">
|
||||
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
import os
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "anything"
|
||||
|
||||
chat = ChatOpenAI(
|
||||
openai_api_base="http://0.0.0.0:4000",
|
||||
model = "gpt-3.5-turbo",
|
||||
temperature=0.1,
|
||||
extra_body={
|
||||
"user": "my_customer_id" # 👈 whatever your customer id is
|
||||
}
|
||||
)
|
||||
|
||||
messages = [
|
||||
SystemMessage(
|
||||
content="You are a helpful assistant that im using to make a test request to."
|
||||
),
|
||||
HumanMessage(
|
||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||
),
|
||||
]
|
||||
response = chat(messages)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="users" label="Internal User Billing">
|
||||
|
||||
1. Set 'LAGO_API_CHARGE_BY' to 'user_id'
|
||||
|
||||
```bash
|
||||
export LAGO_API_CHARGE_BY="user_id"
|
||||
```
|
||||
|
||||
2. Create a key for that user
|
||||
|
||||
```bash
|
||||
curl 'http://0.0.0.0:4000/key/generate' \
|
||||
--header 'Authorization: Bearer <your-master-key>' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{"user_id": "my-unique-id"}' # 👈 Internal User's id
|
||||
```
|
||||
|
||||
Response Object:
|
||||
|
||||
```bash
|
||||
{
|
||||
"key": "sk-tXL0wt5-lOOVK9sfY2UacA",
|
||||
}
|
||||
```
|
||||
|
||||
3. Make API Calls with that Key
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="sk-tXL0wt5-lOOVK9sfY2UacA", # 👈 Generated key
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
])
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
|
@ -25,26 +25,45 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit
|
|||
def __init__(self):
|
||||
pass
|
||||
|
||||
#### ASYNC ####
|
||||
|
||||
async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||
pass
|
||||
|
||||
async def async_log_pre_api_call(self, model, messages, kwargs):
|
||||
pass
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
pass
|
||||
|
||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
pass
|
||||
|
||||
#### CALL HOOKS - proxy only ####
|
||||
|
||||
async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal["completion", "embeddings"]):
|
||||
async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal[
|
||||
"completion",
|
||||
"text_completion",
|
||||
"embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",
|
||||
]) -> Optional[dict, str, Exception]:
|
||||
data["model"] = "my-new-model"
|
||||
return data
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
|
||||
):
|
||||
pass
|
||||
|
||||
async def async_post_call_success_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response,
|
||||
):
|
||||
pass
|
||||
|
||||
async def async_moderation_hook( # call made in parallel to llm api call
|
||||
self,
|
||||
data: dict,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
call_type: Literal["completion", "embeddings", "image_generation"],
|
||||
):
|
||||
pass
|
||||
|
||||
async def async_post_call_streaming_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response: str,
|
||||
):
|
||||
pass
|
||||
proxy_handler_instance = MyCustomHandler()
|
||||
```
|
||||
|
||||
|
@ -190,4 +209,100 @@ general_settings:
|
|||
|
||||
**Result**
|
||||
|
||||
<Image img={require('../../img/end_user_enforcement.png')}/>
|
||||
<Image img={require('../../img/end_user_enforcement.png')}/>
|
||||
|
||||
## Advanced - Return rejected message as response
|
||||
|
||||
For chat completions and text completion calls, you can return a rejected message as a user response.
|
||||
|
||||
Do this by returning a string. LiteLLM takes care of returning the response in the correct format depending on the endpoint and if it's streaming/non-streaming.
|
||||
|
||||
For non-chat/text completion endpoints, this response is returned as a 400 status code exception.
|
||||
|
||||
|
||||
### 1. Create Custom Handler
|
||||
|
||||
```python
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import litellm
|
||||
from litellm.utils import get_formatted_prompt
|
||||
|
||||
# This file includes the custom callbacks for LiteLLM Proxy
|
||||
# Once defined, these can be passed in proxy_config.yaml
|
||||
class MyCustomHandler(CustomLogger):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
#### CALL HOOKS - proxy only ####
|
||||
|
||||
async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal[
|
||||
"completion",
|
||||
"text_completion",
|
||||
"embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",
|
||||
]) -> Optional[dict, str, Exception]:
|
||||
formatted_prompt = get_formatted_prompt(data=data, call_type=call_type)
|
||||
|
||||
if "Hello world" in formatted_prompt:
|
||||
return "This is an invalid response"
|
||||
|
||||
return data
|
||||
|
||||
proxy_handler_instance = MyCustomHandler()
|
||||
```
|
||||
|
||||
### 2. Update config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
|
||||
litellm_settings:
|
||||
callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||
```
|
||||
|
||||
|
||||
### 3. Test it!
|
||||
|
||||
```shell
|
||||
$ litellm /path/to/config.yaml
|
||||
```
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--data ' {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello world"
|
||||
}
|
||||
],
|
||||
}'
|
||||
```
|
||||
|
||||
**Expected Response**
|
||||
|
||||
```
|
||||
{
|
||||
"id": "chatcmpl-d00bbede-2d90-4618-bf7b-11a1c23cf360",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "This is an invalid response.", # 👈 REJECTED RESPONSE
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1716234198,
|
||||
"model": null,
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": {}
|
||||
}
|
||||
```
|
|
@ -5,6 +5,8 @@
|
|||
- debug (prints info logs)
|
||||
- detailed debug (prints debug logs)
|
||||
|
||||
The proxy also supports json logs. [See here](#json-logs)
|
||||
|
||||
## `debug`
|
||||
|
||||
**via cli**
|
||||
|
@ -31,4 +33,20 @@ $ litellm --detailed_debug
|
|||
|
||||
```python
|
||||
os.environ["LITELLM_LOG"] = "DEBUG"
|
||||
```
|
||||
```
|
||||
|
||||
## JSON LOGS
|
||||
|
||||
Set `JSON_LOGS="True"` in your env:
|
||||
|
||||
```bash
|
||||
export JSON_LOGS="True"
|
||||
```
|
||||
|
||||
Start proxy
|
||||
|
||||
```bash
|
||||
$ litellm
|
||||
```
|
||||
|
||||
The proxy will now all logs in json format.
|
|
@ -1,7 +1,8 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# ✨ Enterprise Features - Content Mod, SSO
|
||||
# ✨ Enterprise Features - Content Mod, SSO, Custom Swagger
|
||||
|
||||
Features here are behind a commercial license in our `/enterprise` folder. [**See Code**](https://github.com/BerriAI/litellm/tree/main/enterprise)
|
||||
|
||||
|
@ -20,6 +21,7 @@ Features:
|
|||
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
||||
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
|
||||
- ✅ Tracking Spend for Custom Tags
|
||||
- ✅ Custom Branding + Routes on Swagger Docs
|
||||
|
||||
|
||||
|
||||
|
@ -526,4 +528,39 @@ curl -X GET "http://0.0.0.0:4000/spend/tags" \
|
|||
|
||||
<!-- ## Tracking Spend per Key
|
||||
|
||||
## Tracking Spend per User -->
|
||||
## Tracking Spend per User -->
|
||||
|
||||
## Swagger Docs - Custom Routes + Branding
|
||||
|
||||
:::info
|
||||
|
||||
Requires a LiteLLM Enterprise key to use. Request one [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
||||
|
||||
:::
|
||||
|
||||
Set LiteLLM Key in your environment
|
||||
|
||||
```bash
|
||||
LITELLM_LICENSE=""
|
||||
```
|
||||
|
||||
### Customize Title + Description
|
||||
|
||||
In your environment, set:
|
||||
|
||||
```bash
|
||||
DOCS_TITLE="TotalGPT"
|
||||
DOCS_DESCRIPTION="Sample Company Description"
|
||||
```
|
||||
|
||||
### Customize Routes
|
||||
|
||||
Hide admin routes from users.
|
||||
|
||||
In your environment, set:
|
||||
|
||||
```bash
|
||||
DOCS_FILTERED="True" # only shows openai routes to user
|
||||
```
|
||||
|
||||
<Image img={require('../../img/custom_swagger.png')} style={{ width: '900px', height: 'auto' }} />
|
BIN
docs/my-website/img/custom_swagger.png
Normal file
BIN
docs/my-website/img/custom_swagger.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 223 KiB |
BIN
docs/my-website/img/lago.jpeg
Normal file
BIN
docs/my-website/img/lago.jpeg
Normal file
Binary file not shown.
After Width: | Height: | Size: 344 KiB |
BIN
docs/my-website/img/lago_2.png
Normal file
BIN
docs/my-website/img/lago_2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 219 KiB |
|
@ -41,6 +41,7 @@ const sidebars = {
|
|||
"proxy/reliability",
|
||||
"proxy/cost_tracking",
|
||||
"proxy/users",
|
||||
"proxy/billing",
|
||||
"proxy/user_keys",
|
||||
"proxy/enterprise",
|
||||
"proxy/virtual_keys",
|
||||
|
@ -175,6 +176,7 @@ const sidebars = {
|
|||
"observability/custom_callback",
|
||||
"observability/langfuse_integration",
|
||||
"observability/sentry",
|
||||
"observability/lago",
|
||||
"observability/openmeter",
|
||||
"observability/promptlayer_integration",
|
||||
"observability/wandb_integration",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue