Merge branch 'BerriAI:main' into main

This commit is contained in:
Hannes Burrichter 2024-06-16 15:15:00 +02:00 committed by GitHub
commit 1b644fa0fc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
142 changed files with 19096 additions and 13214 deletions

View file

@ -25,6 +25,11 @@ jobs:
if: github.repository == 'BerriAI/litellm' if: github.repository == 'BerriAI/litellm'
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
-
name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- -
name: Set up QEMU name: Set up QEMU
uses: docker/setup-qemu-action@v3 uses: docker/setup-qemu-action@v3
@ -41,12 +46,14 @@ jobs:
name: Build and push name: Build and push
uses: docker/build-push-action@v5 uses: docker/build-push-action@v5
with: with:
context: .
push: true push: true
tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }} tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }}
- -
name: Build and push litellm-database image name: Build and push litellm-database image
uses: docker/build-push-action@v5 uses: docker/build-push-action@v5
with: with:
context: .
push: true push: true
file: Dockerfile.database file: Dockerfile.database
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }} tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
@ -54,6 +61,7 @@ jobs:
name: Build and push litellm-spend-logs image name: Build and push litellm-spend-logs image
uses: docker/build-push-action@v5 uses: docker/build-push-action@v5
with: with:
context: .
push: true push: true
file: ./litellm-js/spend-logs/Dockerfile file: ./litellm-js/spend-logs/Dockerfile
tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }} tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
@ -68,6 +76,8 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
- name: Log in to the Container registry - name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -92,7 +102,7 @@ jobs:
- name: Build and push Docker image - name: Build and push Docker image
uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8 uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8
with: with:
context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}} context: .
push: true push: true
tags: ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }} # if a tag is provided, use that, otherwise use the release tag, and if neither is available, use 'latest' tags: ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }} # if a tag is provided, use that, otherwise use the release tag, and if neither is available, use 'latest'
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
@ -106,6 +116,8 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry - name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -128,7 +140,7 @@ jobs:
- name: Build and push Database Docker image - name: Build and push Database Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with: with:
context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}} context: .
file: Dockerfile.database file: Dockerfile.database
push: true push: true
tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }} tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }}
@ -143,6 +155,8 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry - name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
@ -165,7 +179,7 @@ jobs:
- name: Build and push Database Docker image - name: Build and push Database Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with: with:
context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}} context: .
file: ./litellm-js/spend-logs/Dockerfile file: ./litellm-js/spend-logs/Dockerfile
push: true push: true
tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }} tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
@ -176,6 +190,8 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry - name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1

View file

@ -1,4 +1,19 @@
repos: repos:
- repo: local
hooks:
# - id: mypy
# name: mypy
# entry: python3 -m mypy --ignore-missing-imports
# language: system
# types: [python]
# files: ^litellm/
- id: isort
name: isort
entry: isort
language: system
types: [python]
files: litellm/.*\.py
exclude: ^litellm/__init__.py$
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 24.2.0 rev: 24.2.0
hooks: hooks:
@ -16,11 +31,10 @@ repos:
name: Check if files match name: Check if files match
entry: python3 ci_cd/check_files_match.py entry: python3 ci_cd/check_files_match.py
language: system language: system
- repo: local # - id: check-file-length
hooks: # name: Check file length
- id: mypy # entry: python check_file_length.py
name: mypy # args: ["10000"] # set your desired maximum number of lines
entry: python3 -m mypy --ignore-missing-imports # language: python
language: system # files: litellm/.*\.py
types: [python] # exclude: ^litellm/tests/
files: ^litellm/

28
check_file_length.py Normal file
View file

@ -0,0 +1,28 @@
import sys
def check_file_length(max_lines, filenames):
bad_files = []
for filename in filenames:
with open(filename, "r") as file:
lines = file.readlines()
if len(lines) > max_lines:
bad_files.append((filename, len(lines)))
return bad_files
if __name__ == "__main__":
max_lines = int(sys.argv[1])
filenames = sys.argv[2:]
bad_files = check_file_length(max_lines, filenames)
if bad_files:
bad_files.sort(
key=lambda x: x[1], reverse=True
) # Sort files by length in descending order
for filename, length in bad_files:
print(f"{filename}: {length} lines")
sys.exit(1)
else:
sys.exit(0)

View file

@ -162,7 +162,7 @@ def completion(
- `function`: *object* - Required. - `function`: *object* - Required.
- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {"type: "function", "function": {"name": "my_function"}} forces the model to call that function. - `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via `{"type: "function", "function": {"name": "my_function"}}` forces the model to call that function.
- `none` is the default when no functions are present. `auto` is the default if functions are present. - `none` is the default when no functions are present. `auto` is the default if functions are present.

View file

@ -1,90 +0,0 @@
import Image from '@theme/IdealImage';
import QueryParamReader from '../../src/components/queryParamReader.js'
# [Beta] Monitor Logs in Production
:::note
This is in beta. Expect frequent updates, as we improve based on your feedback.
:::
LiteLLM provides an integration to let you monitor logs in production.
👉 Jump to our sample LiteLLM Dashboard: https://admin.litellm.ai/
<Image img={require('../../img/alt_dashboard.png')} alt="Dashboard" />
## Debug your first logs
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_OpenAI.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
### 1. Get your LiteLLM Token
Go to [admin.litellm.ai](https://admin.litellm.ai/) and copy the code snippet with your unique token
<Image img={require('../../img/hosted_debugger_usage_page.png')} alt="Usage" />
### 2. Set up your environment
**Add it to your .env**
```python
import os
os.env["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
```
**Turn on LiteLLM Client**
```python
import litellm
litellm.client = True
```
### 3. Make a normal `completion()` call
```python
import litellm
from litellm import completion
import os
# set env variables
os.environ["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
os.environ["OPENAI_API_KEY"] = "openai key"
litellm.use_client = True # enable logging dashboard
messages = [{ "content": "Hello, how are you?","role": "user"}]
# openai call
response = completion(model="gpt-3.5-turbo", messages=messages)
```
Your `completion()` call print with a link to your session dashboard (https://admin.litellm.ai/<your_unique_token>)
In the above case it would be: [`admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb`](https://admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb)
Click on your personal dashboard link. Here's how you can find it 👇
<Image img={require('../../img/dash_output.png')} alt="Dashboard" />
[👋 Tell us if you need better privacy controls](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version?month=2023-08)
### 3. Review request log
Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider.
Ah! So we can see that this request was made to a **Baseten** (see litellm_params > custom_llm_provider) for a model with ID - **7qQNLDB** (see model). The message sent was - `"Hey, how's it going?"` and the response received was - `"As an AI language model, I don't have feelings or emotions, but I can assist you with your queries. How can I assist you today?"`
<Image img={require('../../img/dashboard_log.png')} alt="Dashboard Log Row" />
:::info
🎉 Congratulations! You've successfully debugger your first log!
:::

View file

@ -1,3 +1,5 @@
import Image from '@theme/IdealImage';
# Promptlayer Tutorial # Promptlayer Tutorial
Promptlayer is a platform for prompt engineers. Log OpenAI requests. Search usage history. Track performance. Visually manage prompt templates. Promptlayer is a platform for prompt engineers. Log OpenAI requests. Search usage history. Track performance. Visually manage prompt templates.

View file

@ -1,3 +1,6 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# OpenAI (Text Completion) # OpenAI (Text Completion)
LiteLLM supports OpenAI text completion models LiteLLM supports OpenAI text completion models

View file

@ -208,7 +208,7 @@ print(response)
Instead of using the `custom_llm_provider` arg to specify which provider you're using (e.g. together ai), you can just pass the provider name as part of the model name, and LiteLLM will parse it out. Instead of using the `custom_llm_provider` arg to specify which provider you're using (e.g. together ai), you can just pass the provider name as part of the model name, and LiteLLM will parse it out.
Expected format: <custom_llm_provider>/<model_name> Expected format: `<custom_llm_provider>/<model_name>`
e.g. completion(model="together_ai/togethercomputer/Llama-2-7B-32K-Instruct", ...) e.g. completion(model="together_ai/togethercomputer/Llama-2-7B-32K-Instruct", ...)

View file

@ -32,40 +32,32 @@ Get a slack webhook url from https://api.slack.com/messaging/webhooks
You can also use Discord Webhooks, see [here](#using-discord-webhooks) You can also use Discord Webhooks, see [here](#using-discord-webhooks)
### Step 2: Update config.yaml
- Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts. Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
- Just for testing purposes, let's save a bad key to our proxy.
```bash
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/<>/<>/<>"
```
### Step 2: Setup Proxy
```yaml ```yaml
model_list:
model_name: "azure-model"
litellm_params:
model: "azure/gpt-35-turbo"
api_key: "my-bad-key" # 👈 bad key
general_settings: general_settings:
alerting: ["slack"] alerting: ["slack"]
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+ alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
environment_variables:
SLACK_WEBHOOK_URL: "https://hooks.slack.com/services/<>/<>/<>"
SLACK_DAILY_REPORT_FREQUENCY: "86400" # 24 hours; Optional: defaults to 12 hours
``` ```
Start proxy
### Step 3: Start proxy
```bash ```bash
$ litellm --config /path/to/config.yaml $ litellm --config /path/to/config.yaml
``` ```
## Testing Alerting is Setup Correctly
Make a GET request to `/health/services`, expect to see a test slack alert in your provided webhook slack channel ### Step 3: Test it!
```shell
curl -X GET 'http://localhost:4000/health/services?service=slack' \ ```bash
curl -X GET 'http://0.0.0.0:4000/health/services?service=slack' \
-H 'Authorization: Bearer sk-1234' -H 'Authorization: Bearer sk-1234'
``` ```
@ -84,7 +76,34 @@ litellm_settings:
``` ```
## Advanced - Add Metadata to alerts
Add alerting metadata to proxy calls for debugging.
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [],
extra_body={
"metadata": {
"alerting_metadata": {
"hello": "world"
}
}
}
)
```
**Expected Response**
<Image img={require('../../img/alerting_metadata.png')}/>
## Advanced - Opting into specific alert types ## Advanced - Opting into specific alert types

View file

@ -669,7 +669,7 @@ Once the stack is created, get the DatabaseURL of the Database resource, copy th
#### 3. Connect to the EC2 Instance and deploy litellm on the EC2 container #### 3. Connect to the EC2 Instance and deploy litellm on the EC2 container
From the EC2 console, connect to the instance created by the stack (e.g., using SSH). From the EC2 console, connect to the instance created by the stack (e.g., using SSH).
Run the following command, replacing <database_url> with the value you copied in step 2 Run the following command, replacing `<database_url>` with the value you copied in step 2
```shell ```shell
docker run --name litellm-proxy \ docker run --name litellm-proxy \

View file

@ -5,6 +5,7 @@ import Image from '@theme/IdealImage';
Send an Email to your users when: Send an Email to your users when:
- A Proxy API Key is created for them - A Proxy API Key is created for them
- Their API Key crosses it's Budget - Their API Key crosses it's Budget
- All Team members of a LiteLLM Team -> when the team crosses it's budget
<Image img={require('../../img/email_notifs.png')} style={{ width: '500px' }}/> <Image img={require('../../img/email_notifs.png')} style={{ width: '500px' }}/>

View file

@ -1,3 +1,6 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Model Management # Model Management
Add new models + Get model info without restarting proxy. Add new models + Get model info without restarting proxy.

View file

@ -1,3 +1,5 @@
import Image from '@theme/IdealImage';
# LiteLLM Proxy Performance # LiteLLM Proxy Performance
### Throughput - 30% Increase ### Throughput - 30% Increase

View file

@ -409,6 +409,28 @@ print(response)
</Tabs> </Tabs>
### Content Policy Fallbacks
Fallback across providers (e.g. from Azure OpenAI to Anthropic) if you hit content policy violation errors.
```yaml
model_list:
- model_name: gpt-3.5-turbo-small
litellm_params:
model: azure/chatgpt-v-2
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"
- model_name: claude-opus
litellm_params:
model: claude-3-opus-20240229
api_key: os.environ/ANTHROPIC_API_KEY
litellm_settings:
content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}]
```
### EU-Region Filtering (Pre-Call Checks) ### EU-Region Filtering (Pre-Call Checks)
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**. **Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.

View file

@ -46,7 +46,7 @@ Possible values for `budget_duration`
| `budget_duration="1m"` | every 1 min | | `budget_duration="1m"` | every 1 min |
| `budget_duration="1h"` | every 1 hour | | `budget_duration="1h"` | every 1 hour |
| `budget_duration="1d"` | every 1 day | | `budget_duration="1d"` | every 1 day |
| `budget_duration="1mo"` | start of every month | | `budget_duration="1mo"` | every 1 month |
### 2. Create a key for the `team` ### 2. Create a key for the `team`

View file

@ -413,6 +413,52 @@ curl 'http://0.0.0.0:4000/key/generate' \
</TabItem> </TabItem>
</Tabs> </Tabs>
### Reset Budgets
Reset budgets across keys/internal users/teams/customers
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
<Tabs>
<TabItem value="users" label="Internal Users">
```bash
curl 'http://0.0.0.0:4000/user/new' \
--header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \
--data-raw '{
"max_budget": 10,
"budget_duration": 10s, # 👈 KEY CHANGE
}'
```
</TabItem>
<TabItem value="keys" label="Keys">
```bash
curl 'http://0.0.0.0:4000/key/generate' \
--header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \
--data-raw '{
"max_budget": 10,
"budget_duration": 10s, # 👈 KEY CHANGE
}'
```
</TabItem>
<TabItem value="teams" label="Teams">
```bash
curl 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \
--data-raw '{
"max_budget": 10,
"budget_duration": 10s, # 👈 KEY CHANGE
}'
```
</TabItem>
</Tabs>
## Set Rate Limits ## Set Rate Limits
You can set: You can set:

View file

@ -790,84 +790,204 @@ If the error is a context window exceeded error, fall back to a larger model gro
Fallbacks are done in-order - ["gpt-3.5-turbo, "gpt-4", "gpt-4-32k"], will do 'gpt-3.5-turbo' first, then 'gpt-4', etc. Fallbacks are done in-order - ["gpt-3.5-turbo, "gpt-4", "gpt-4-32k"], will do 'gpt-3.5-turbo' first, then 'gpt-4', etc.
You can also set 'default_fallbacks', in case a specific model group is misconfigured / bad. You can also set `default_fallbacks`, in case a specific model group is misconfigured / bad.
There are 3 types of fallbacks:
- `content_policy_fallbacks`: For litellm.ContentPolicyViolationError - LiteLLM maps content policy violation errors across providers [**See Code**](https://github.com/BerriAI/litellm/blob/89a43c872a1e3084519fb9de159bf52f5447c6c4/litellm/utils.py#L8495C27-L8495C54)
- `context_window_fallbacks`: For litellm.ContextWindowExceededErrors - LiteLLM maps context window error messages across providers [**See Code**](https://github.com/BerriAI/litellm/blob/89a43c872a1e3084519fb9de159bf52f5447c6c4/litellm/utils.py#L8469)
- `fallbacks`: For all remaining errors - e.g. litellm.RateLimitError
**Content Policy Violation Fallback**
Key change:
```python
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}]
```
<Tabs>
<TabItem value="sdk" label="SDK">
```python ```python
from litellm import Router from litellm import Router
router = Router(
model_list=[ model_list=[
{ # list of model deployments {
"model_name": "azure/gpt-3.5-turbo", # openai model name "model_name": "claude-2",
"litellm_params": { # params for litellm completion/embedding call "litellm_params": {
"model": "azure/chatgpt-v-2", "model": "claude-2",
"api_key": "bad-key", "api_key": "",
"api_version": os.getenv("AZURE_API_VERSION"), "mock_response": Exception("content filtering policy"),
"api_base": os.getenv("AZURE_API_BASE")
}, },
"tpm": 240000,
"rpm": 1800
},
{ # list of model deployments
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
},
"tpm": 240000,
"rpm": 1800
}, },
{ {
"model_name": "azure/gpt-3.5-turbo", # openai model name "model_name": "my-fallback-model",
"litellm_params": { # params for litellm completion/embedding call "litellm_params": {
"model": "azure/chatgpt-functioncalling", "model": "claude-2",
"api_key": "bad-key", "api_key": "",
"api_version": os.getenv("AZURE_API_VERSION"), "mock_response": "This works!",
"api_base": os.getenv("AZURE_API_BASE")
}, },
"tpm": 240000,
"rpm": 1800
}, },
{ ],
"model_name": "gpt-3.5-turbo", # openai model name content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
"litellm_params": { # params for litellm completion/embedding call # fallbacks=[..], # [OPTIONAL]
"model": "gpt-3.5-turbo", # context_window_fallbacks=[..], # [OPTIONAL]
"api_key": os.getenv("OPENAI_API_KEY"), )
},
"tpm": 1000000,
"rpm": 9000
},
{
"model_name": "gpt-3.5-turbo-16k", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo-16k",
"api_key": os.getenv("OPENAI_API_KEY"),
},
"tpm": 1000000,
"rpm": 9000
}
]
response = router.completion(
router = Router(model_list=model_list, model="claude-2",
fallbacks=[{"azure/gpt-3.5-turbo": ["gpt-3.5-turbo"]}], messages=[{"role": "user", "content": "Hey, how's it going?"}],
default_fallbacks=["gpt-3.5-turbo-16k"], )
context_window_fallbacks=[{"azure/gpt-3.5-turbo-context-fallback": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}],
set_verbose=True)
user_message = "Hello, whats the weather in San Francisco??"
messages = [{"content": user_message, "role": "user"}]
# normal fallback call
response = router.completion(model="azure/gpt-3.5-turbo", messages=messages)
# context window fallback call
response = router.completion(model="azure/gpt-3.5-turbo-context-fallback", messages=messages)
print(f"response: {response}")
``` ```
</TabItem>
<TabItem value="proxy" label="PROXY">
In your proxy config.yaml just add this line 👇
```yaml
router_settings:
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}]
```
Start proxy
```bash
litellm --config /path/to/config.yaml
# RUNNING on http://0.0.0.0:4000
```
</TabItem>
</Tabs>
**Context Window Exceeded Fallback**
Key change:
```python
context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}]
```
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import Router
router = Router(
model_list=[
{
"model_name": "claude-2",
"litellm_params": {
"model": "claude-2",
"api_key": "",
"mock_response": Exception("prompt is too long"),
},
},
{
"model_name": "my-fallback-model",
"litellm_params": {
"model": "claude-2",
"api_key": "",
"mock_response": "This works!",
},
},
],
context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
# fallbacks=[..], # [OPTIONAL]
# content_policy_fallbacks=[..], # [OPTIONAL]
)
response = router.completion(
model="claude-2",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
In your proxy config.yaml just add this line 👇
```yaml
router_settings:
context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}]
```
Start proxy
```bash
litellm --config /path/to/config.yaml
# RUNNING on http://0.0.0.0:4000
```
</TabItem>
</Tabs>
**Regular Fallbacks**
Key change:
```python
fallbacks=[{"claude-2": ["my-fallback-model"]}]
```
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import Router
router = Router(
model_list=[
{
"model_name": "claude-2",
"litellm_params": {
"model": "claude-2",
"api_key": "",
"mock_response": Exception("this is a rate limit error"),
},
},
{
"model_name": "my-fallback-model",
"litellm_params": {
"model": "claude-2",
"api_key": "",
"mock_response": "This works!",
},
},
],
fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
# context_window_fallbacks=[..], # [OPTIONAL]
# content_policy_fallbacks=[..], # [OPTIONAL]
)
response = router.completion(
model="claude-2",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
In your proxy config.yaml just add this line 👇
```yaml
router_settings:
fallbacks=[{"claude-2": ["my-fallback-model"]}]
```
Start proxy
```bash
litellm --config /path/to/config.yaml
# RUNNING on http://0.0.0.0:4000
```
</TabItem>
</Tabs>
### Caching ### Caching

View file

@ -23,9 +23,13 @@ https://api.together.xyz/playground/chat?model=togethercomputer%2Fllama-2-70b-ch
model_name = "together_ai/togethercomputer/llama-2-70b-chat" model_name = "together_ai/togethercomputer/llama-2-70b-chat"
response = completion(model=model_name, messages=messages) response = completion(model=model_name, messages=messages)
print(response) print(response)
```
``` ```
{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': "\n\nI'm not able to provide real-time weather information. However, I can suggest"}}], 'created': 1691629657.9288375, 'model': 'togethercomputer/llama-2-70b-chat', 'usage': {'prompt_tokens': 9, 'completion_tokens': 17, 'total_tokens': 26}} {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': "\n\nI'm not able to provide real-time weather information. However, I can suggest"}}], 'created': 1691629657.9288375, 'model': 'togethercomputer/llama-2-70b-chat', 'usage': {'prompt_tokens': 9, 'completion_tokens': 17, 'total_tokens': 26}}
```
LiteLLM handles the prompt formatting for Together AI's Llama2 models as well, converting your message to the LiteLLM handles the prompt formatting for Together AI's Llama2 models as well, converting your message to the

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

File diff suppressed because it is too large Load diff

View file

@ -14,17 +14,17 @@
"write-heading-ids": "docusaurus write-heading-ids" "write-heading-ids": "docusaurus write-heading-ids"
}, },
"dependencies": { "dependencies": {
"@docusaurus/core": "2.4.1", "@docusaurus/core": "3.1.1",
"@docusaurus/plugin-google-gtag": "^2.4.1", "@docusaurus/plugin-google-gtag": "3.1.1",
"@docusaurus/plugin-ideal-image": "^2.4.1", "@docusaurus/plugin-ideal-image": "3.1.1",
"@docusaurus/preset-classic": "2.4.1", "@docusaurus/preset-classic": "3.1.1",
"@mdx-js/react": "^1.6.22", "@mdx-js/react": "^1.6.22",
"clsx": "^1.2.1", "clsx": "^1.2.1",
"docusaurus": "^1.14.7", "docusaurus": "^1.14.7",
"docusaurus-lunr-search": "^2.4.1", "docusaurus-lunr-search": "^2.4.1",
"prism-react-renderer": "^1.3.5", "prism-react-renderer": "^1.3.5",
"react": "^17.0.2", "react": "^18.1.0",
"react-dom": "^17.0.2", "react-dom": "^18.1.0",
"sharp": "^0.32.6", "sharp": "^0.32.6",
"uuid": "^9.0.1" "uuid": "^9.0.1"
}, },

File diff suppressed because it is too large Load diff

View file

@ -122,236 +122,6 @@ async def ui_get_spend_by_tags(
return {"spend_per_tag": ui_tags} return {"spend_per_tag": ui_tags}
async def view_spend_logs_from_clickhouse(
api_key=None, user_id=None, request_id=None, start_date=None, end_date=None
):
verbose_logger.debug("Reading logs from Clickhouse")
import os
# if user has setup clickhouse
# TODO: Move this to be a helper function
# querying clickhouse for this data
import clickhouse_connect
from datetime import datetime
port = os.getenv("CLICKHOUSE_PORT")
if port is not None and isinstance(port, str):
port = int(port)
client = clickhouse_connect.get_client(
host=os.getenv("CLICKHOUSE_HOST"),
port=port,
username=os.getenv("CLICKHOUSE_USERNAME", ""),
password=os.getenv("CLICKHOUSE_PASSWORD", ""),
)
if (
start_date is not None
and isinstance(start_date, str)
and end_date is not None
and isinstance(end_date, str)
):
# Convert the date strings to datetime objects
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
# get top spend per day
response = client.query(
f"""
SELECT
toDate(startTime) AS day,
sum(spend) AS total_spend
FROM
spend_logs
WHERE
toDate(startTime) BETWEEN toDate('2024-02-01') AND toDate('2024-02-29')
GROUP BY
day
ORDER BY
total_spend
"""
)
results = []
result_rows = list(response.result_rows)
for response in result_rows:
current_row = {}
current_row["users"] = {"example": 0.0}
current_row["models"] = {}
current_row["spend"] = float(response[1])
current_row["startTime"] = str(response[0])
# stubbed api_key
current_row[""] = 0.0 # type: ignore
results.append(current_row)
return results
else:
# check if spend logs exist, if it does then return last 10 logs, sorted in descending order of startTime
response = client.query(
"""
SELECT
*
FROM
default.spend_logs
ORDER BY
startTime DESC
LIMIT
10
"""
)
# get size of spend logs
num_rows = client.query("SELECT count(*) FROM default.spend_logs")
num_rows = num_rows.result_rows[0][0]
# safely access num_rows.result_rows[0][0]
if num_rows is None:
num_rows = 0
raw_rows = list(response.result_rows)
response_data = {
"logs": raw_rows,
"log_count": num_rows,
}
return response_data
def _create_clickhouse_material_views(client=None, table_names=[]):
# Create Materialized Views if they don't exist
# Materialized Views send new inserted rows to the aggregate tables
verbose_logger.debug("Clickhouse: Creating Materialized Views")
if "daily_aggregated_spend_per_model_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
TO daily_aggregated_spend_per_model
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
model as model
FROM spend_logs
GROUP BY
day, model
"""
)
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
verbose_logger.debug(
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
)
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
TO daily_aggregated_spend_per_api_key
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
api_key as api_key
FROM spend_logs
GROUP BY
day, api_key
"""
)
if "daily_aggregated_spend_per_user_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
TO daily_aggregated_spend_per_user
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
user as user
FROM spend_logs
GROUP BY
day, user
"""
)
if "daily_aggregated_spend_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
TO daily_aggregated_spend
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend
FROM spend_logs
GROUP BY
day
"""
)
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
# Basic Logging works without this - this is only used for low latency reporting apis
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
# Create Aggregeate Tables if they don't exist
if "daily_aggregated_spend_per_model" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_model
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`model` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, model);
"""
)
if "daily_aggregated_spend_per_api_key" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_api_key
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`api_key` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, api_key);
"""
)
if "daily_aggregated_spend_per_user" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_user
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`user` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, user);
"""
)
if "daily_aggregated_spend" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
client.command(
"""
CREATE TABLE daily_aggregated_spend
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
)
ENGINE = SummingMergeTree()
ORDER BY (day);
"""
)
return
def _forecast_daily_cost(data: list): def _forecast_daily_cost(data: list):
import requests # type: ignore import requests # type: ignore
from datetime import datetime, timedelta from datetime import datetime, timedelta

View file

@ -240,6 +240,7 @@ num_retries: Optional[int] = None # per model endpoint
default_fallbacks: Optional[List] = None default_fallbacks: Optional[List] = None
fallbacks: Optional[List] = None fallbacks: Optional[List] = None
context_window_fallbacks: Optional[List] = None context_window_fallbacks: Optional[List] = None
content_policy_fallbacks: Optional[List] = None
allowed_fails: int = 0 allowed_fails: int = 0
num_retries_per_request: Optional[int] = ( num_retries_per_request: Optional[int] = (
None # for the request overall (incl. fallbacks + model retries) None # for the request overall (incl. fallbacks + model retries)
@ -714,6 +715,7 @@ openai_image_generation_models = ["dall-e-2", "dall-e-3"]
from .timeout import timeout from .timeout import timeout
from .cost_calculator import completion_cost from .cost_calculator import completion_cost
from litellm.litellm_core_utils.litellm_logging import Logging
from .utils import ( from .utils import (
client, client,
exception_type, exception_type,
@ -722,12 +724,10 @@ from .utils import (
token_counter, token_counter,
create_pretrained_tokenizer, create_pretrained_tokenizer,
create_tokenizer, create_tokenizer,
cost_per_token,
supports_function_calling, supports_function_calling,
supports_parallel_function_calling, supports_parallel_function_calling,
supports_vision, supports_vision,
get_litellm_params, get_litellm_params,
Logging,
acreate, acreate,
get_model_list, get_model_list,
get_max_tokens, get_max_tokens,
@ -747,9 +747,10 @@ from .utils import (
get_first_chars_messages, get_first_chars_messages,
ModelResponse, ModelResponse,
ImageResponse, ImageResponse,
ImageObject,
get_provider_fields, get_provider_fields,
) )
from .types.utils import ImageObject
from .llms.huggingface_restapi import HuggingfaceConfig from .llms.huggingface_restapi import HuggingfaceConfig
from .llms.anthropic import AnthropicConfig from .llms.anthropic import AnthropicConfig
from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig
@ -826,4 +827,4 @@ from .router import Router
from .assistants.main import * from .assistants.main import *
from .batches.main import * from .batches.main import *
from .scheduler import * from .scheduler import *
from .cost_calculator import response_cost_calculator from .cost_calculator import response_cost_calculator, cost_per_token

View file

@ -3,10 +3,17 @@ from logging import Formatter
import traceback import traceback
set_verbose = False set_verbose = False
if set_verbose is True:
logging.warning(
"`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
)
json_logs = bool(os.getenv("JSON_LOGS", False)) json_logs = bool(os.getenv("JSON_LOGS", False))
# Create a handler for the logger (you may need to adapt this based on your needs) # Create a handler for the logger (you may need to adapt this based on your needs)
log_level = os.getenv("LITELLM_LOG", "ERROR")
numeric_level: str = getattr(logging, log_level.upper())
handler = logging.StreamHandler() handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG) handler.setLevel(numeric_level)
class JsonFormatter(Formatter): class JsonFormatter(Formatter):

View file

@ -1,6 +1,6 @@
# What is this? # What is this?
## File for 'response_cost' calculation in Logging ## File for 'response_cost' calculation in Logging
from typing import Optional, Union, Literal, List from typing import Optional, Union, Literal, List, Tuple
import litellm._logging import litellm._logging
from litellm.utils import ( from litellm.utils import (
ModelResponse, ModelResponse,
@ -9,7 +9,6 @@ from litellm.utils import (
TranscriptionResponse, TranscriptionResponse,
TextCompletionResponse, TextCompletionResponse,
CallTypes, CallTypes,
cost_per_token,
print_verbose, print_verbose,
CostPerToken, CostPerToken,
token_counter, token_counter,
@ -18,6 +17,224 @@ import litellm
from litellm import verbose_logger from litellm import verbose_logger
def _cost_per_token_custom_pricing_helper(
prompt_tokens=0,
completion_tokens=0,
response_time_ms=None,
### CUSTOM PRICING ###
custom_cost_per_token: Optional[CostPerToken] = None,
custom_cost_per_second: Optional[float] = None,
) -> Optional[Tuple[float, float]]:
"""Internal helper function for calculating cost, if custom pricing given"""
if custom_cost_per_token is None and custom_cost_per_second is None:
return None
if custom_cost_per_token is not None:
input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
return input_cost, output_cost
elif custom_cost_per_second is not None:
output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore
return 0, output_cost
return None
def cost_per_token(
model: str = "",
prompt_tokens=0,
completion_tokens=0,
response_time_ms=None,
custom_llm_provider=None,
region_name=None,
### CUSTOM PRICING ###
custom_cost_per_token: Optional[CostPerToken] = None,
custom_cost_per_second: Optional[float] = None,
) -> Tuple[float, float]:
"""
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
Parameters:
model (str): The name of the model to use. Default is ""
prompt_tokens (int): The number of tokens in the prompt.
completion_tokens (int): The number of tokens in the completion.
response_time (float): The amount of time, in milliseconds, it took the call to complete.
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
Returns:
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
"""
if model is None:
raise Exception("Invalid arg. Model cannot be none.")
## CUSTOM PRICING ##
response_cost = _cost_per_token_custom_pricing_helper(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
response_time_ms=response_time_ms,
custom_cost_per_second=custom_cost_per_second,
custom_cost_per_token=custom_cost_per_token,
)
if response_cost is not None:
return response_cost[0], response_cost[1]
# given
prompt_tokens_cost_usd_dollar: float = 0
completion_tokens_cost_usd_dollar: float = 0
model_cost_ref = litellm.model_cost
model_with_provider = model
if custom_llm_provider is not None:
model_with_provider = custom_llm_provider + "/" + model
if region_name is not None:
model_with_provider_and_region = (
f"{custom_llm_provider}/{region_name}/{model}"
)
if (
model_with_provider_and_region in model_cost_ref
): # use region based pricing, if it's available
model_with_provider = model_with_provider_and_region
model_without_prefix = model
model_parts = model.split("/")
if len(model_parts) > 1:
model_without_prefix = model_parts[1]
else:
model_without_prefix = model
"""
Code block that formats model to lookup in litellm.model_cost
Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
Option2. model = "openai/gpt-4" - model = provider/model
Option3. model = "anthropic.claude-3" - model = model
"""
if (
model_with_provider in model_cost_ref
): # Option 2. use model with provider, model = "openai/gpt-4"
model = model_with_provider
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
model = model
elif (
model_without_prefix in model_cost_ref
): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
model = model_without_prefix
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
print_verbose(f"Looking up model={model} in model_cost_map")
if model in model_cost_ref:
print_verbose(f"Success: model={model} in model_cost_map")
print_verbose(
f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
)
if (
model_cost_ref[model].get("input_cost_per_token", None) is not None
and model_cost_ref[model].get("output_cost_per_token", None) is not None
):
## COST PER TOKEN ##
prompt_tokens_cost_usd_dollar = (
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
)
completion_tokens_cost_usd_dollar = (
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
)
elif (
model_cost_ref[model].get("output_cost_per_second", None) is not None
and response_time_ms is not None
):
print_verbose(
f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
)
## COST PER SECOND ##
prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = (
model_cost_ref[model]["output_cost_per_second"]
* response_time_ms
/ 1000
)
elif (
model_cost_ref[model].get("input_cost_per_second", None) is not None
and response_time_ms is not None
):
print_verbose(
f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
)
## COST PER SECOND ##
prompt_tokens_cost_usd_dollar = (
model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
)
completion_tokens_cost_usd_dollar = 0.0
print_verbose(
f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
elif "ft:gpt-3.5-turbo" in model:
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
prompt_tokens_cost_usd_dollar = (
model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
)
completion_tokens_cost_usd_dollar = (
model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
* completion_tokens
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
elif "ft:davinci-002" in model:
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
# fuzzy match ft:davinci-002:abcd-id-cool-litellm
prompt_tokens_cost_usd_dollar = (
model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
)
completion_tokens_cost_usd_dollar = (
model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
* completion_tokens
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
elif "ft:babbage-002" in model:
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
# fuzzy match ft:babbage-002:abcd-id-cool-litellm
prompt_tokens_cost_usd_dollar = (
model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
)
completion_tokens_cost_usd_dollar = (
model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
* completion_tokens
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
elif model in litellm.azure_llms:
verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
model = litellm.azure_llms[model]
verbose_logger.debug(
f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
)
prompt_tokens_cost_usd_dollar = (
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
)
verbose_logger.debug(
f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
)
completion_tokens_cost_usd_dollar = (
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
elif model in litellm.azure_embedding_models:
verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
model = litellm.azure_embedding_models[model]
prompt_tokens_cost_usd_dollar = (
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
)
completion_tokens_cost_usd_dollar = (
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
else:
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
raise litellm.exceptions.NotFoundError( # type: ignore
message=error_str,
model=model,
llm_provider="",
)
# Extract the number of billion parameters from the model name # Extract the number of billion parameters from the model name
# only used for together_computer LLMs # only used for together_computer LLMs
def get_model_params_and_category(model_name) -> str: def get_model_params_and_category(model_name) -> str:

View file

@ -324,7 +324,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
message, message,
model, model,
llm_provider, llm_provider,
response: httpx.Response, response: Optional[httpx.Response] = None,
litellm_debug_info: Optional[str] = None, litellm_debug_info: Optional[str] = None,
): ):
self.status_code = 400 self.status_code = 400
@ -332,11 +332,13 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
self.response = response or httpx.Response(status_code=400, request=request)
super().__init__( super().__init__(
message=self.message, message=self.message,
model=self.model, # type: ignore model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore llm_provider=self.llm_provider, # type: ignore
response=response, response=self.response,
litellm_debug_info=self.litellm_debug_info, litellm_debug_info=self.litellm_debug_info,
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs
@ -407,7 +409,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
message, message,
model, model,
llm_provider, llm_provider,
response: httpx.Response, response: Optional[httpx.Response] = None,
litellm_debug_info: Optional[str] = None, litellm_debug_info: Optional[str] = None,
): ):
self.status_code = 400 self.status_code = 400
@ -415,11 +417,13 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
self.response = response or httpx.Response(status_code=500, request=request)
super().__init__( super().__init__(
message=self.message, message=self.message,
model=self.model, # type: ignore model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore llm_provider=self.llm_provider, # type: ignore
response=response, response=self.response,
litellm_debug_info=self.litellm_debug_info, litellm_debug_info=self.litellm_debug_info,
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs

View file

@ -226,14 +226,6 @@ def _start_clickhouse():
response = client.query("DESCRIBE default.spend_logs") response = client.query("DESCRIBE default.spend_logs")
verbose_logger.debug(f"spend logs schema ={response.result_rows}") verbose_logger.debug(f"spend logs schema ={response.result_rows}")
# RUN Enterprise Clickhouse Setup
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
_create_clickhouse_material_views(client=client, table_names=table_names)
class ClickhouseLogger: class ClickhouseLogger:
# Class variables or attributes # Class variables or attributes

View file

@ -0,0 +1,136 @@
"""
Functions for sending Email Alerts
"""
import os
from typing import Optional, List
from litellm.proxy._types import WebhookEvent
import asyncio
from litellm._logging import verbose_logger, verbose_proxy_logger
# we use this for the email header, please send a test email if you change this. verify it looks good on email
LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
LITELLM_SUPPORT_CONTACT = "support@berri.ai"
async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
verbose_logger.debug(
"Email Alerting: Getting all team members for team_id=%s", team_id
)
if team_id is None:
return []
from litellm.proxy.proxy_server import premium_user, prisma_client
if prisma_client is None:
raise Exception("Not connected to DB!")
team_row = await prisma_client.db.litellm_teamtable.find_unique(
where={
"team_id": team_id,
}
)
if team_row is None:
return []
_team_members = team_row.members_with_roles
verbose_logger.debug(
"Email Alerting: Got team members for team_id=%s Team Members: %s",
team_id,
_team_members,
)
_team_member_user_ids: List[str] = []
for member in _team_members:
if member and isinstance(member, dict) and member.get("user_id") is not None:
_team_member_user_ids.append(member.get("user_id"))
sql_query = """
SELECT user_email
FROM "LiteLLM_UserTable"
WHERE user_id = ANY($1::TEXT[]);
"""
_result = await prisma_client.db.query_raw(sql_query, _team_member_user_ids)
verbose_logger.debug("Email Alerting: Got all Emails for team, emails=%s", _result)
if _result is None:
return []
emails = []
for user in _result:
if user and isinstance(user, dict) and user.get("user_email", None) is not None:
emails.append(user.get("user_email"))
return emails
async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
"""
Send an Email Alert to All Team Members when the Team Budget is crossed
Returns -> True if sent, False if not.
"""
from litellm.proxy.utils import send_email
from litellm.proxy.proxy_server import premium_user, prisma_client
_team_id = webhook_event.team_id
team_alias = webhook_event.team_alias
verbose_logger.debug(
"Email Alerting: Sending Team Budget Alert for team=%s", team_alias
)
email_logo_url = os.getenv("SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None))
email_support_contact = os.getenv("EMAIL_SUPPORT_CONTACT", None)
# await self._check_if_using_premium_email_feature(
# premium_user, email_logo_url, email_support_contact
# )
if email_logo_url is None:
email_logo_url = LITELLM_LOGO_URL
if email_support_contact is None:
email_support_contact = LITELLM_SUPPORT_CONTACT
recipient_emails = await get_all_team_member_emails(_team_id)
recipient_emails_str: str = ",".join(recipient_emails)
verbose_logger.debug(
"Email Alerting: Sending team budget alert to %s", recipient_emails_str
)
event_name = webhook_event.event_message
max_budget = webhook_event.max_budget
email_html_content = "Alert from LiteLLM Server"
if recipient_emails_str is None:
verbose_proxy_logger.error(
"Email Alerting: Trying to send email alert to no recipient, got recipient_emails=%s",
recipient_emails_str,
)
email_html_content = f"""
<img src="{email_logo_url}" alt="LiteLLM Logo" width="150" height="50" /> <br/><br/><br/>
Budget Crossed for Team <b> {team_alias} </b> <br/> <br/>
Your Teams LLM API usage has crossed it's <b> budget of ${max_budget} </b>, current spend is <b>${webhook_event.spend}</b><br /> <br />
API requests will be rejected until either (a) you increase your budget or (b) your budget gets reset <br /> <br />
If you have any questions, please send an email to {email_support_contact} <br /> <br />
Best, <br />
The LiteLLM team <br />
"""
email_event = {
"to": recipient_emails_str,
"subject": f"LiteLLM {event_name} for Team {team_alias}",
"html": email_html_content,
}
await send_email(
receiver_email=email_event["to"],
subject=email_event["subject"],
html=email_event["html"],
)
return False

View file

@ -330,6 +330,7 @@ class SlackAlerting(CustomLogger):
messages = "Message not logged. litellm.redact_messages_in_exceptions=True" messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`" request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`" slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
alerting_metadata: dict = {}
if time_difference_float > self.alerting_threshold: if time_difference_float > self.alerting_threshold:
# add deployment latencies to alert # add deployment latencies to alert
if ( if (
@ -337,7 +338,7 @@ class SlackAlerting(CustomLogger):
and "litellm_params" in kwargs and "litellm_params" in kwargs
and "metadata" in kwargs["litellm_params"] and "metadata" in kwargs["litellm_params"]
): ):
_metadata = kwargs["litellm_params"]["metadata"] _metadata: dict = kwargs["litellm_params"]["metadata"]
request_info = litellm.utils._add_key_name_and_team_to_alert( request_info = litellm.utils._add_key_name_and_team_to_alert(
request_info=request_info, metadata=_metadata request_info=request_info, metadata=_metadata
) )
@ -349,10 +350,14 @@ class SlackAlerting(CustomLogger):
request_info += ( request_info += (
f"\nAvailable Deployment Latencies\n{_deployment_latency_map}" f"\nAvailable Deployment Latencies\n{_deployment_latency_map}"
) )
if "alerting_metadata" in _metadata:
alerting_metadata = _metadata["alerting_metadata"]
await self.send_alert( await self.send_alert(
message=slow_message + request_info, message=slow_message + request_info,
level="Low", level="Low",
alert_type="llm_too_slow", alert_type="llm_too_slow",
alerting_metadata=alerting_metadata,
) )
async def async_update_daily_reports( async def async_update_daily_reports(
@ -540,7 +545,12 @@ class SlackAlerting(CustomLogger):
message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s" message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s"
# send alert # send alert
await self.send_alert(message=message, level="Low", alert_type="daily_reports") await self.send_alert(
message=message,
level="Low",
alert_type="daily_reports",
alerting_metadata={},
)
return True return True
@ -582,6 +592,7 @@ class SlackAlerting(CustomLogger):
await asyncio.sleep( await asyncio.sleep(
self.alerting_threshold self.alerting_threshold
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests ) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
alerting_metadata: dict = {}
if ( if (
request_data is not None request_data is not None
and request_data.get("litellm_status", "") != "success" and request_data.get("litellm_status", "") != "success"
@ -606,7 +617,7 @@ class SlackAlerting(CustomLogger):
): ):
# In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data`` # In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data``
# in that case we fallback to the api base set in the request metadata # in that case we fallback to the api base set in the request metadata
_metadata = request_data["metadata"] _metadata: dict = request_data["metadata"]
_api_base = _metadata.get("api_base", "") _api_base = _metadata.get("api_base", "")
request_info = litellm.utils._add_key_name_and_team_to_alert( request_info = litellm.utils._add_key_name_and_team_to_alert(
@ -615,6 +626,9 @@ class SlackAlerting(CustomLogger):
if _api_base is None: if _api_base is None:
_api_base = "" _api_base = ""
if "alerting_metadata" in _metadata:
alerting_metadata = _metadata["alerting_metadata"]
request_info += f"\nAPI Base: `{_api_base}`" request_info += f"\nAPI Base: `{_api_base}`"
# only alert hanging responses if they have not been marked as success # only alert hanging responses if they have not been marked as success
alerting_message = ( alerting_message = (
@ -640,6 +654,7 @@ class SlackAlerting(CustomLogger):
message=alerting_message + request_info, message=alerting_message + request_info,
level="Medium", level="Medium",
alert_type="llm_requests_hanging", alert_type="llm_requests_hanging",
alerting_metadata=alerting_metadata,
) )
async def failed_tracking_alert(self, error_message: str): async def failed_tracking_alert(self, error_message: str):
@ -650,7 +665,10 @@ class SlackAlerting(CustomLogger):
result = await _cache.async_get_cache(key=_cache_key) result = await _cache.async_get_cache(key=_cache_key)
if result is None: if result is None:
await self.send_alert( await self.send_alert(
message=message, level="High", alert_type="budget_alerts" message=message,
level="High",
alert_type="budget_alerts",
alerting_metadata={},
) )
await _cache.async_set_cache( await _cache.async_set_cache(
key=_cache_key, key=_cache_key,
@ -680,7 +698,7 @@ class SlackAlerting(CustomLogger):
return return
if "budget_alerts" not in self.alert_types: if "budget_alerts" not in self.alert_types:
return return
_id: str = "default_id" # used for caching _id: Optional[str] = "default_id" # used for caching
user_info_json = user_info.model_dump(exclude_none=True) user_info_json = user_info.model_dump(exclude_none=True)
for k, v in user_info_json.items(): for k, v in user_info_json.items():
user_info_str = "\n{}: {}\n".format(k, v) user_info_str = "\n{}: {}\n".format(k, v)
@ -751,6 +769,7 @@ class SlackAlerting(CustomLogger):
level="High", level="High",
alert_type="budget_alerts", alert_type="budget_alerts",
user_info=webhook_event, user_info=webhook_event,
alerting_metadata={},
) )
await _cache.async_set_cache( await _cache.async_set_cache(
key=_cache_key, key=_cache_key,
@ -769,7 +788,13 @@ class SlackAlerting(CustomLogger):
response_cost: Optional[float], response_cost: Optional[float],
max_budget: Optional[float], max_budget: Optional[float],
): ):
if end_user_id is not None and token is not None and response_cost is not None: if (
self.alerting is not None
and "webhook" in self.alerting
and end_user_id is not None
and token is not None
and response_cost is not None
):
# log customer spend # log customer spend
event = WebhookEvent( event = WebhookEvent(
spend=response_cost, spend=response_cost,
@ -941,7 +966,10 @@ class SlackAlerting(CustomLogger):
) )
# send minor alert # send minor alert
await self.send_alert( await self.send_alert(
message=msg, level="Medium", alert_type="outage_alerts" message=msg,
level="Medium",
alert_type="outage_alerts",
alerting_metadata={},
) )
# set to true # set to true
outage_value["minor_alert_sent"] = True outage_value["minor_alert_sent"] = True
@ -963,7 +991,12 @@ class SlackAlerting(CustomLogger):
) )
# send minor alert # send minor alert
await self.send_alert(message=msg, level="High", alert_type="outage_alerts") await self.send_alert(
message=msg,
level="High",
alert_type="outage_alerts",
alerting_metadata={},
)
# set to true # set to true
outage_value["major_alert_sent"] = True outage_value["major_alert_sent"] = True
@ -1062,7 +1095,10 @@ class SlackAlerting(CustomLogger):
) )
# send minor alert # send minor alert
await self.send_alert( await self.send_alert(
message=msg, level="Medium", alert_type="outage_alerts" message=msg,
level="Medium",
alert_type="outage_alerts",
alerting_metadata={},
) )
# set to true # set to true
outage_value["minor_alert_sent"] = True outage_value["minor_alert_sent"] = True
@ -1081,7 +1117,10 @@ class SlackAlerting(CustomLogger):
) )
# send minor alert # send minor alert
await self.send_alert( await self.send_alert(
message=msg, level="High", alert_type="outage_alerts" message=msg,
level="High",
alert_type="outage_alerts",
alerting_metadata={},
) )
# set to true # set to true
outage_value["major_alert_sent"] = True outage_value["major_alert_sent"] = True
@ -1143,7 +1182,10 @@ Model Info:
""" """
alert_val = self.send_alert( alert_val = self.send_alert(
message=message, level="Low", alert_type="new_model_added" message=message,
level="Low",
alert_type="new_model_added",
alerting_metadata={},
) )
if alert_val is not None and asyncio.iscoroutine(alert_val): if alert_val is not None and asyncio.iscoroutine(alert_val):
@ -1159,6 +1201,9 @@ Model Info:
Currently only implemented for budget alerts Currently only implemented for budget alerts
Returns -> True if sent, False if not. Returns -> True if sent, False if not.
Raises Exception
- if WEBHOOK_URL is not set
""" """
webhook_url = os.getenv("WEBHOOK_URL", None) webhook_url = os.getenv("WEBHOOK_URL", None)
@ -1297,7 +1342,9 @@ Model Info:
verbose_proxy_logger.error("Error sending email alert %s", str(e)) verbose_proxy_logger.error("Error sending email alert %s", str(e))
return False return False
async def send_email_alert_using_smtp(self, webhook_event: WebhookEvent) -> bool: async def send_email_alert_using_smtp(
self, webhook_event: WebhookEvent, alert_type: str
) -> bool:
""" """
Sends structured Email alert to an SMTP server Sends structured Email alert to an SMTP server
@ -1306,7 +1353,6 @@ Model Info:
Returns -> True if sent, False if not. Returns -> True if sent, False if not.
""" """
from litellm.proxy.utils import send_email from litellm.proxy.utils import send_email
from litellm.proxy.proxy_server import premium_user, prisma_client from litellm.proxy.proxy_server import premium_user, prisma_client
email_logo_url = os.getenv( email_logo_url = os.getenv(
@ -1360,6 +1406,10 @@ Model Info:
subject=email_event["subject"], subject=email_event["subject"],
html=email_event["html"], html=email_event["html"],
) )
if webhook_event.event_group == "team":
from litellm.integrations.email_alerting import send_team_budget_alert
await send_team_budget_alert(webhook_event=webhook_event)
return False return False
@ -1368,6 +1418,7 @@ Model Info:
message: str, message: str,
level: Literal["Low", "Medium", "High"], level: Literal["Low", "Medium", "High"],
alert_type: Literal[AlertType], alert_type: Literal[AlertType],
alerting_metadata: dict,
user_info: Optional[WebhookEvent] = None, user_info: Optional[WebhookEvent] = None,
**kwargs, **kwargs,
): ):
@ -1401,7 +1452,9 @@ Model Info:
and user_info is not None and user_info is not None
): ):
# only send budget alerts over Email # only send budget alerts over Email
await self.send_email_alert_using_smtp(webhook_event=user_info) await self.send_email_alert_using_smtp(
webhook_event=user_info, alert_type=alert_type
)
if "slack" not in self.alerting: if "slack" not in self.alerting:
return return
@ -1425,6 +1478,9 @@ Model Info:
if kwargs: if kwargs:
for key, value in kwargs.items(): for key, value in kwargs.items():
formatted_message += f"\n\n{key}: `{value}`\n\n" formatted_message += f"\n\n{key}: `{value}`\n\n"
if alerting_metadata:
for key, value in alerting_metadata.items():
formatted_message += f"\n\n*Alerting Metadata*: \n{key}: `{value}`\n\n"
if _proxy_base_url is not None: if _proxy_base_url is not None:
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
@ -1440,7 +1496,7 @@ Model Info:
slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None) slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None)
if slack_webhook_url is None: if slack_webhook_url is None:
raise Exception("Missing SLACK_WEBHOOK_URL from environment") raise ValueError("Missing SLACK_WEBHOOK_URL from environment")
payload = {"text": formatted_message} payload = {"text": formatted_message}
headers = {"Content-type": "application/json"} headers = {"Content-type": "application/json"}
@ -1453,7 +1509,7 @@ Model Info:
pass pass
else: else:
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
"Error sending slack alert. Error=", response.text "Error sending slack alert. Error={}".format(response.text)
) )
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -1622,6 +1678,7 @@ Model Info:
message=_weekly_spend_message, message=_weekly_spend_message,
level="Low", level="Low",
alert_type="spend_reports", alert_type="spend_reports",
alerting_metadata={},
) )
except Exception as e: except Exception as e:
verbose_proxy_logger.error("Error sending weekly spend report", e) verbose_proxy_logger.error("Error sending weekly spend report", e)
@ -1673,6 +1730,7 @@ Model Info:
message=_spend_message, message=_spend_message,
level="Low", level="Low",
alert_type="spend_reports", alert_type="spend_reports",
alerting_metadata={},
) )
except Exception as e: except Exception as e:
verbose_proxy_logger.error("Error sending weekly spend report", e) verbose_proxy_logger.error("Error sending weekly spend report", e)

View file

@ -0,0 +1,41 @@
# What is this?
## Helper utilities for the model response objects
def map_finish_reason(
finish_reason: str,
): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
# anthropic mapping
if finish_reason == "stop_sequence":
return "stop"
# cohere mapping - https://docs.cohere.com/reference/generate
elif finish_reason == "COMPLETE":
return "stop"
elif finish_reason == "MAX_TOKENS": # cohere + vertex ai
return "length"
elif finish_reason == "ERROR_TOXIC":
return "content_filter"
elif (
finish_reason == "ERROR"
): # openai currently doesn't support an 'error' finish reason
return "stop"
# huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream
elif finish_reason == "eos_token" or finish_reason == "stop_sequence":
return "stop"
elif (
finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP"
): # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',]
return "stop"
elif finish_reason == "SAFETY": # vertex ai
return "content_filter"
elif finish_reason == "STOP": # vertex ai
return "stop"
elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic
return "stop"
elif finish_reason == "max_tokens": # anthropic
return "length"
elif finish_reason == "tool_use": # anthropic
return "tool_calls"
elif finish_reason == "content_filtered":
return "content_filter"
return finish_reason

File diff suppressed because it is too large Load diff

View file

@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any
import litellm import litellm
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.utils import Logging as _LiteLLMLoggingObject from litellm.litellm_core_utils.litellm_logging import (
Logging as _LiteLLMLoggingObject,
)
LiteLLMLoggingObject = _LiteLLMLoggingObject LiteLLMLoggingObject = _LiteLLMLoggingObject
else: else:
@ -30,7 +32,6 @@ def redact_message_input_output_from_logging(
if litellm.turn_off_message_logging is not True: if litellm.turn_off_message_logging is not True:
return result return result
_result = copy.deepcopy(result)
# remove messages, prompts, input, response from logging # remove messages, prompts, input, response from logging
litellm_logging_obj.model_call_details["messages"] = [ litellm_logging_obj.model_call_details["messages"] = [
{"role": "user", "content": "redacted-by-litellm"} {"role": "user", "content": "redacted-by-litellm"}
@ -53,8 +54,10 @@ def redact_message_input_output_from_logging(
elif isinstance(choice, litellm.utils.StreamingChoices): elif isinstance(choice, litellm.utils.StreamingChoices):
choice.delta.content = "redacted-by-litellm" choice.delta.content = "redacted-by-litellm"
else: else:
if _result is not None: if result is not None:
if isinstance(_result, litellm.ModelResponse): if isinstance(result, litellm.ModelResponse):
# only deep copy litellm.ModelResponse
_result = copy.deepcopy(result)
if hasattr(_result, "choices") and _result.choices is not None: if hasattr(_result, "choices") and _result.choices is not None:
for choice in _result.choices: for choice in _result.choices:
if isinstance(choice, litellm.Choices): if isinstance(choice, litellm.Choices):
@ -63,3 +66,6 @@ def redact_message_input_output_from_logging(
choice.delta.content = "redacted-by-litellm" choice.delta.content = "redacted-by-litellm"
return _result return _result
# by default return result
return result

View file

@ -5,10 +5,16 @@ import requests, copy # type: ignore
import time import time
from functools import partial from functools import partial
from typing import Callable, Optional, List, Union from typing import Callable, Optional, List, Union
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper import litellm.litellm_core_utils
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
_get_async_httpx_client,
_get_httpx_client,
)
from .base import BaseLLM from .base import BaseLLM
import httpx # type: ignore import httpx # type: ignore
from litellm.types.llms.anthropic import AnthropicMessagesToolChoice from litellm.types.llms.anthropic import AnthropicMessagesToolChoice
@ -171,7 +177,7 @@ async def make_call(
logging_obj, logging_obj,
): ):
if client is None: if client is None:
client = AsyncHTTPHandler() # Create a new client if none provided client = _get_async_httpx_client() # Create a new client if none provided
response = await client.post(api_base, headers=headers, data=data, stream=True) response = await client.post(api_base, headers=headers, data=data, stream=True)
@ -201,7 +207,7 @@ class AnthropicChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response], response: Union[requests.Response, httpx.Response],
model_response: ModelResponse, model_response: ModelResponse,
stream: bool, stream: bool,
logging_obj: litellm.utils.Logging, logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict, optional_params: dict,
api_key: str, api_key: str,
data: Union[dict, str], data: Union[dict, str],
@ -316,7 +322,7 @@ class AnthropicChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response], response: Union[requests.Response, httpx.Response],
model_response: ModelResponse, model_response: ModelResponse,
stream: bool, stream: bool,
logging_obj: litellm.utils.Logging, logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict, optional_params: dict,
api_key: str, api_key: str,
data: Union[dict, str], data: Union[dict, str],
@ -463,9 +469,7 @@ class AnthropicChatCompletion(BaseLLM):
logger_fn=None, logger_fn=None,
headers={}, headers={},
) -> Union[ModelResponse, CustomStreamWrapper]: ) -> Union[ModelResponse, CustomStreamWrapper]:
async_handler = AsyncHTTPHandler( async_handler = _get_async_httpx_client()
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
)
response = await async_handler.post(api_base, headers=headers, json=data) response = await async_handler.post(api_base, headers=headers, json=data)
if stream and _is_function_call: if stream and _is_function_call:
return self.process_streaming_response( return self.process_streaming_response(

View file

@ -2,7 +2,7 @@
import litellm import litellm
import httpx, requests import httpx, requests
from typing import Optional, Union from typing import Optional, Union
from litellm.utils import Logging from litellm.litellm_core_utils.litellm_logging import Logging
class BaseLLM: class BaseLLM:

View file

@ -5,12 +5,10 @@ import time, uuid
from typing import Callable, Optional, Any, Union, List from typing import Callable, Optional, Any, Union, List
import litellm import litellm
from litellm.utils import ( from litellm.utils import (
ModelResponse,
get_secret, get_secret,
Usage,
ImageResponse,
map_finish_reason,
) )
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.types.utils import ImageResponse, ModelResponse, Usage
from .prompt_templates.factory import ( from .prompt_templates.factory import (
prompt_factory, prompt_factory,
custom_prompt, custom_prompt,
@ -633,7 +631,11 @@ def init_bedrock_client(
config = boto3.session.Config() config = boto3.session.Config()
### CHECK STS ### ### CHECK STS ###
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None: if (
aws_web_identity_token is not None
and aws_role_name is not None
and aws_session_name is not None
):
oidc_token = get_secret(aws_web_identity_token) oidc_token = get_secret(aws_web_identity_token)
if oidc_token is None: if oidc_token is None:
@ -642,9 +644,7 @@ def init_bedrock_client(
status_code=401, status_code=401,
) )
sts_client = boto3.client( sts_client = boto3.client("sts")
"sts"
)
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html

View file

@ -22,13 +22,12 @@ from typing import (
from litellm.utils import ( from litellm.utils import (
ModelResponse, ModelResponse,
Usage, Usage,
map_finish_reason,
CustomStreamWrapper, CustomStreamWrapper,
Message,
Choices,
get_secret, get_secret,
Logging,
) )
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.types.utils import Message, Choices
import litellm, uuid import litellm, uuid
from .prompt_templates.factory import ( from .prompt_templates.factory import (
prompt_factory, prompt_factory,
@ -41,7 +40,12 @@ from .prompt_templates.factory import (
_bedrock_converse_messages_pt, _bedrock_converse_messages_pt,
_bedrock_tools_pt, _bedrock_tools_pt,
) )
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
_get_async_httpx_client,
_get_httpx_client,
)
from .base import BaseLLM from .base import BaseLLM
import httpx # type: ignore import httpx # type: ignore
from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
@ -57,6 +61,7 @@ from litellm.caching import DualCache
iam_cache = DualCache() iam_cache = DualCache()
class AmazonCohereChatConfig: class AmazonCohereChatConfig:
""" """
Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
@ -167,7 +172,7 @@ async def make_call(
logging_obj, logging_obj,
): ):
if client is None: if client is None:
client = AsyncHTTPHandler() # Create a new client if none provided client = _get_async_httpx_client() # Create a new client if none provided
response = await client.post(api_base, headers=headers, data=data, stream=True) response = await client.post(api_base, headers=headers, data=data, stream=True)
@ -198,7 +203,7 @@ def make_sync_call(
logging_obj, logging_obj,
): ):
if client is None: if client is None:
client = HTTPHandler() # Create a new client if none provided client = _get_httpx_client() # Create a new client if none provided
response = client.post(api_base, headers=headers, data=data, stream=True) response = client.post(api_base, headers=headers, data=data, stream=True)
@ -327,13 +332,19 @@ class BedrockLLM(BaseLLM):
) = params_to_check ) = params_to_check
### CHECK STS ### ### CHECK STS ###
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None: if (
iam_creds_cache_key = json.dumps({ aws_web_identity_token is not None
and aws_role_name is not None
and aws_session_name is not None
):
iam_creds_cache_key = json.dumps(
{
"aws_web_identity_token": aws_web_identity_token, "aws_web_identity_token": aws_web_identity_token,
"aws_role_name": aws_role_name, "aws_role_name": aws_role_name,
"aws_session_name": aws_session_name, "aws_session_name": aws_session_name,
"aws_region_name": aws_region_name, "aws_region_name": aws_region_name,
}) }
)
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key) iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
if iam_creds_dict is None: if iam_creds_dict is None:
@ -348,7 +359,7 @@ class BedrockLLM(BaseLLM):
sts_client = boto3.client( sts_client = boto3.client(
"sts", "sts",
region_name=aws_region_name, region_name=aws_region_name,
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com" endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
) )
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
@ -362,12 +373,18 @@ class BedrockLLM(BaseLLM):
iam_creds_dict = { iam_creds_dict = {
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"], "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"], "aws_secret_access_key": sts_response["Credentials"][
"SecretAccessKey"
],
"aws_session_token": sts_response["Credentials"]["SessionToken"], "aws_session_token": sts_response["Credentials"]["SessionToken"],
"region_name": aws_region_name, "region_name": aws_region_name,
} }
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60) iam_cache.set_cache(
key=iam_creds_cache_key,
value=json.dumps(iam_creds_dict),
ttl=3600 - 60,
)
session = boto3.Session(**iam_creds_dict) session = boto3.Session(**iam_creds_dict)
@ -976,7 +993,7 @@ class BedrockLLM(BaseLLM):
if isinstance(timeout, float) or isinstance(timeout, int): if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout) timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout _params["timeout"] = timeout
self.client = HTTPHandler(**_params) # type: ignore self.client = _get_httpx_client(_params) # type: ignore
else: else:
self.client = client self.client = client
if (stream is not None and stream == True) and provider != "ai21": if (stream is not None and stream == True) and provider != "ai21":
@ -1058,7 +1075,7 @@ class BedrockLLM(BaseLLM):
if isinstance(timeout, float) or isinstance(timeout, int): if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout) timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout _params["timeout"] = timeout
client = AsyncHTTPHandler(**_params) # type: ignore client = _get_async_httpx_client(_params) # type: ignore
else: else:
client = client # type: ignore client = client # type: ignore
@ -1433,13 +1450,19 @@ class BedrockConverseLLM(BaseLLM):
) = params_to_check ) = params_to_check
### CHECK STS ### ### CHECK STS ###
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None: if (
iam_creds_cache_key = json.dumps({ aws_web_identity_token is not None
and aws_role_name is not None
and aws_session_name is not None
):
iam_creds_cache_key = json.dumps(
{
"aws_web_identity_token": aws_web_identity_token, "aws_web_identity_token": aws_web_identity_token,
"aws_role_name": aws_role_name, "aws_role_name": aws_role_name,
"aws_session_name": aws_session_name, "aws_session_name": aws_session_name,
"aws_region_name": aws_region_name, "aws_region_name": aws_region_name,
}) }
)
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key) iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
if iam_creds_dict is None: if iam_creds_dict is None:
@ -1454,7 +1477,7 @@ class BedrockConverseLLM(BaseLLM):
sts_client = boto3.client( sts_client = boto3.client(
"sts", "sts",
region_name=aws_region_name, region_name=aws_region_name,
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com" endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
) )
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
@ -1468,12 +1491,18 @@ class BedrockConverseLLM(BaseLLM):
iam_creds_dict = { iam_creds_dict = {
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"], "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"], "aws_secret_access_key": sts_response["Credentials"][
"SecretAccessKey"
],
"aws_session_token": sts_response["Credentials"]["SessionToken"], "aws_session_token": sts_response["Credentials"]["SessionToken"],
"region_name": aws_region_name, "region_name": aws_region_name,
} }
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60) iam_cache.set_cache(
key=iam_creds_cache_key,
value=json.dumps(iam_creds_dict),
ttl=3600 - 60,
)
session = boto3.Session(**iam_creds_dict) session = boto3.Session(**iam_creds_dict)
@ -1575,7 +1604,7 @@ class BedrockConverseLLM(BaseLLM):
if isinstance(timeout, float) or isinstance(timeout, int): if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout) timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout _params["timeout"] = timeout
client = AsyncHTTPHandler(**_params) # type: ignore client = _get_async_httpx_client(_params) # type: ignore
else: else:
client = client # type: ignore client = client # type: ignore
@ -1847,7 +1876,7 @@ class BedrockConverseLLM(BaseLLM):
if isinstance(timeout, float) or isinstance(timeout, int): if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout) timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout _params["timeout"] = timeout
client = HTTPHandler(**_params) # type: ignore client = _get_httpx_client(_params) # type: ignore
else: else:
client = client client = client
try: try:

View file

@ -219,3 +219,60 @@ class HTTPHandler:
self.close() self.close()
except Exception: except Exception:
pass pass
def _get_async_httpx_client(params: Optional[dict] = None) -> AsyncHTTPHandler:
"""
Retrieves the async HTTP client from the cache
If not present, creates a new client
Caches the new client and returns it.
"""
_params_key_name = ""
if params is not None:
for key, value in params.items():
try:
_params_key_name += f"{key}_{value}"
except Exception:
pass
_cache_key_name = "async_httpx_client" + _params_key_name
if _cache_key_name in litellm.in_memory_llm_clients_cache:
return litellm.in_memory_llm_clients_cache[_cache_key_name]
if params is not None:
_new_client = AsyncHTTPHandler(**params)
else:
_new_client = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
)
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
return _new_client
def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
"""
Retrieves the HTTP client from the cache
If not present, creates a new client
Caches the new client and returns it.
"""
_params_key_name = ""
if params is not None:
for key, value in params.items():
try:
_params_key_name += f"{key}_{value}"
except Exception:
pass
_cache_key_name = "httpx_client" + _params_key_name
if _cache_key_name in litellm.in_memory_llm_clients_cache:
return litellm.in_memory_llm_clients_cache[_cache_key_name]
if params is not None:
_new_client = HTTPHandler(**params)
else:
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
return _new_client

View file

@ -10,10 +10,10 @@ from typing import Callable, Optional, List, Union, Tuple, Literal
from litellm.utils import ( from litellm.utils import (
ModelResponse, ModelResponse,
Usage, Usage,
map_finish_reason,
CustomStreamWrapper, CustomStreamWrapper,
EmbeddingResponse, EmbeddingResponse,
) )
from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@ -289,7 +289,7 @@ class DatabricksChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response], response: Union[requests.Response, httpx.Response],
model_response: ModelResponse, model_response: ModelResponse,
stream: bool, stream: bool,
logging_obj: litellm.utils.Logging, logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict, optional_params: dict,
api_key: str, api_key: str,
data: Union[dict, str], data: Union[dict, str],

View file

@ -12,11 +12,11 @@ from typing import Callable, Optional, List, Literal, Union
from litellm.utils import ( from litellm.utils import (
ModelResponse, ModelResponse,
Usage, Usage,
map_finish_reason,
CustomStreamWrapper, CustomStreamWrapper,
Message, Message,
Choices, Choices,
) )
from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
@ -198,7 +198,7 @@ class PredibaseChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response], response: Union[requests.Response, httpx.Response],
model_response: ModelResponse, model_response: ModelResponse,
stream: bool, stream: bool,
logging_obj: litellm.utils.Logging, logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict, optional_params: dict,
api_key: str, api_key: str,
data: Union[dict, str], data: Union[dict, str],

View file

@ -4,7 +4,6 @@ from enum import Enum
import requests, copy # type: ignore import requests, copy # type: ignore
import time import time
from typing import Callable, Optional, List from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
import litellm import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler

View file

@ -5,7 +5,8 @@ import requests # type: ignore
import time import time
from typing import Callable, Optional, Union, List, Literal, Any from typing import Callable, Optional, Union, List, Literal, Any
from pydantic import BaseModel from pydantic import BaseModel
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm, uuid import litellm, uuid
import httpx, inspect # type: ignore import httpx, inspect # type: ignore
from litellm.types.llms.vertex_ai import * from litellm.types.llms.vertex_ai import *

View file

@ -6,7 +6,8 @@ from enum import Enum
import requests, copy # type: ignore import requests, copy # type: ignore
import time, uuid import time, uuid
from typing import Callable, Optional, List from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm import litellm
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from .prompt_templates.factory import ( from .prompt_templates.factory import (

View file

@ -8,7 +8,10 @@ from enum import Enum
import requests # type: ignore import requests # type: ignore
import time import time
from typing import Callable, Optional, Union, List, Any, Tuple from typing import Callable, Optional, Union, List, Any, Tuple
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason import litellm.litellm_core_utils
import litellm.litellm_core_utils.litellm_logging
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm, uuid import litellm, uuid
import httpx, inspect # type: ignore import httpx, inspect # type: ignore
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@ -320,7 +323,7 @@ class VertexLLM(BaseLLM):
model: str, model: str,
response: httpx.Response, response: httpx.Response,
model_response: ModelResponse, model_response: ModelResponse,
logging_obj: litellm.utils.Logging, logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict, optional_params: dict,
api_key: str, api_key: str,
data: Union[dict, str], data: Union[dict, str],

View file

@ -368,7 +368,9 @@ async def acompletion(
return response return response
except Exception as e: except Exception as e:
verbose_logger.error( verbose_logger.error(
"litellm.acompletion(): Exception occured - {}".format(str(e)) "litellm.acompletion(): Exception occured - {}\n{}".format(
str(e), traceback.format_exc()
)
) )
verbose_logger.debug(traceback.format_exc()) verbose_logger.debug(traceback.format_exc())
custom_llm_provider = custom_llm_provider or "openai" custom_llm_provider = custom_llm_provider or "openai"
@ -399,6 +401,7 @@ def mock_completion(
stream: Optional[bool] = False, stream: Optional[bool] = False,
mock_response: Union[str, Exception] = "This is a mock request", mock_response: Union[str, Exception] = "This is a mock request",
logging=None, logging=None,
custom_llm_provider=None,
**kwargs, **kwargs,
): ):
""" """
@ -436,7 +439,7 @@ def mock_completion(
raise litellm.APIError( raise litellm.APIError(
status_code=getattr(mock_response, "status_code", 500), # type: ignore status_code=getattr(mock_response, "status_code", 500), # type: ignore
message=getattr(mock_response, "text", str(mock_response)), message=getattr(mock_response, "text", str(mock_response)),
llm_provider=getattr(mock_response, "llm_provider", "openai"), # type: ignore llm_provider=getattr(mock_response, "llm_provider", custom_llm_provider or "openai"), # type: ignore
model=model, # type: ignore model=model, # type: ignore
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"), request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
) )
@ -905,6 +908,7 @@ def completion(
logging=logging, logging=logging,
acompletion=acompletion, acompletion=acompletion,
mock_delay=kwargs.get("mock_delay", None), mock_delay=kwargs.get("mock_delay", None),
custom_llm_provider=custom_llm_provider,
) )
if custom_llm_provider == "azure": if custom_llm_provider == "azure":
# azure configs # azure configs

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(64090);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);

View file

@ -1 +0,0 @@
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(57437);n(2265);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]); (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(3827);n(64090);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]); (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{20723:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(57437),a=l(2265),r=l(47907),n=l(2179),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,505,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]); (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/63f65dbb14efd996.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}(); !function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/b65d5698d1a1958d.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/63f65dbb14efd996.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"505\",\"static/chunks/505-5ff3c318fddfa35c.js\",\"131\",\"static/chunks/131-cb6bfe24e23e121b.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"759\",\"static/chunks/759-c0083d8a782d300e.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-8028473f1a04553d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/63f65dbb14efd996.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"sTvd1VbHSi_TBr1KiIpul\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-626098dc8320c801.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S9_6IC27HNWjJtr-LNaAO\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[68101,["936","static/chunks/2f6dbc85-cac2949a76539886.js","505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","684","static/chunks/684-16b194c83a169f6d.js","759","static/chunks/759-c0083d8a782d300e.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-8028473f1a04553d.js"],""] 3:I[68101,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-626098dc8320c801.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[87494,["505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-a1942d43573c82c3.js"],""] 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","505","static/chunks/505-5ff3c318fddfa35c.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-49a30e653b6ae929.js"],""] 3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -79,8 +79,8 @@ litellm_settings:
failure_callback: ["langfuse"] failure_callback: ["langfuse"]
cache: true cache: true
# general_settings: general_settings:
# alerting: ["email"] alerting: ["slack"]
# key_management_system: "aws_kms" # key_management_system: "aws_kms"
# key_management_settings: # key_management_settings:
# hosted_keys: ["LITELLM_MASTER_KEY"] # hosted_keys: ["LITELLM_MASTER_KEY"]

View file

@ -1358,10 +1358,11 @@ class CallInfo(LiteLLMBase):
spend: float spend: float
max_budget: Optional[float] = None max_budget: Optional[float] = None
token: str = Field(description="Hashed value of that key") token: Optional[str] = Field(default=None, description="Hashed value of that key")
customer_id: Optional[str] = None customer_id: Optional[str] = None
user_id: Optional[str] = None user_id: Optional[str] = None
team_id: Optional[str] = None team_id: Optional[str] = None
team_alias: Optional[str] = None
user_email: Optional[str] = None user_email: Optional[str] = None
key_alias: Optional[str] = None key_alias: Optional[str] = None
projected_exceeded_date: Optional[str] = None projected_exceeded_date: Optional[str] = None
@ -1574,3 +1575,44 @@ class ManagementEndpointLoggingPayload(LiteLLMBase):
exception: Optional[Any] = None exception: Optional[Any] = None
start_time: Optional[datetime] = None start_time: Optional[datetime] = None
end_time: Optional[datetime] = None end_time: Optional[datetime] = None
class ProxyException(Exception):
# NOTE: DO NOT MODIFY THIS
# This is used to map exactly to OPENAI Exceptions
def __init__(
self,
message: str,
type: str,
param: Optional[str],
code: Optional[int],
):
self.message = message
self.type = type
self.param = param
self.code = code
# rules for proxyExceptions
# Litellm router.py returns "No healthy deployment available" when there are no deployments available
# Should map to 429 errors https://github.com/BerriAI/litellm/issues/2487
if (
"No healthy deployment available" in self.message
or "No deployments available" in self.message
):
self.code = 429
def to_dict(self) -> dict:
"""Converts the ProxyException instance to a dictionary."""
return {
"message": self.message,
"type": self.type,
"param": self.param,
"code": self.code,
}
class CommonProxyErrors(enum.Enum):
db_not_connected_error = "DB not connected"
no_llm_router = "No models configured on proxy"
not_allowed_access = "Admin-only endpoint. Not allowed to access this."
not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,194 @@
from typing import Optional
from fastapi import Depends, Request, APIRouter
from fastapi import HTTPException
import copy
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
router = APIRouter(
prefix="/cache",
tags=["caching"],
)
@router.get(
"/ping",
dependencies=[Depends(user_api_key_auth)],
)
async def cache_ping():
"""
Endpoint for checking if cache can be pinged
"""
try:
litellm_cache_params = {}
specific_cache_params = {}
if litellm.cache is None:
raise HTTPException(
status_code=503, detail="Cache not initialized. litellm.cache is None"
)
for k, v in vars(litellm.cache).items():
try:
if k == "cache":
continue
litellm_cache_params[k] = str(copy.deepcopy(v))
except Exception:
litellm_cache_params[k] = "<unable to copy or convert>"
for k, v in vars(litellm.cache.cache).items():
try:
specific_cache_params[k] = str(v)
except Exception:
specific_cache_params[k] = "<unable to copy or convert>"
if litellm.cache.type == "redis":
# ping the redis cache
ping_response = await litellm.cache.ping()
verbose_proxy_logger.debug(
"/cache/ping: ping_response: " + str(ping_response)
)
# making a set cache call
# add cache does not return anything
await litellm.cache.async_add_cache(
result="test_key",
model="test-model",
messages=[{"role": "user", "content": "test from litellm"}],
)
verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
return {
"status": "healthy",
"cache_type": litellm.cache.type,
"ping_response": True,
"set_cache_response": "success",
"litellm_cache_params": litellm_cache_params,
"redis_cache_params": specific_cache_params,
}
else:
return {
"status": "healthy",
"cache_type": litellm.cache.type,
"litellm_cache_params": litellm_cache_params,
}
except Exception as e:
raise HTTPException(
status_code=503,
detail=f"Service Unhealthy ({str(e)}).Cache parameters: {litellm_cache_params}.specific_cache_params: {specific_cache_params}",
)
@router.post(
"/delete",
tags=["caching"],
dependencies=[Depends(user_api_key_auth)],
)
async def cache_delete(request: Request):
"""
Endpoint for deleting a key from the cache. All responses from litellm proxy have `x-litellm-cache-key` in the headers
Parameters:
- **keys**: *Optional[List[str]]* - A list of keys to delete from the cache. Example {"keys": ["key1", "key2"]}
```shell
curl -X POST "http://0.0.0.0:4000/cache/delete" \
-H "Authorization: Bearer sk-1234" \
-d '{"keys": ["key1", "key2"]}'
```
"""
try:
if litellm.cache is None:
raise HTTPException(
status_code=503, detail="Cache not initialized. litellm.cache is None"
)
request_data = await request.json()
keys = request_data.get("keys", None)
if litellm.cache.type == "redis":
await litellm.cache.delete_cache_keys(keys=keys)
return {
"status": "success",
}
else:
raise HTTPException(
status_code=500,
detail=f"Cache type {litellm.cache.type} does not support deleting a key. only `redis` is supported",
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Cache Delete Failed({str(e)})",
)
@router.get(
"/redis/info",
dependencies=[Depends(user_api_key_auth)],
)
async def cache_redis_info():
"""
Endpoint for getting /redis/info
"""
try:
if litellm.cache is None:
raise HTTPException(
status_code=503, detail="Cache not initialized. litellm.cache is None"
)
if litellm.cache.type == "redis":
client_list = litellm.cache.cache.client_list()
redis_info = litellm.cache.cache.info()
num_clients = len(client_list)
return {
"num_clients": num_clients,
"clients": client_list,
"info": redis_info,
}
else:
raise HTTPException(
status_code=500,
detail=f"Cache type {litellm.cache.type} does not support flushing",
)
except Exception as e:
raise HTTPException(
status_code=503,
detail=f"Service Unhealthy ({str(e)})",
)
@router.post(
"/flushall",
tags=["caching"],
dependencies=[Depends(user_api_key_auth)],
)
async def cache_flushall():
"""
A function to flush all items from the cache. (All items will be deleted from the cache with this)
Raises HTTPException if the cache is not initialized or if the cache type does not support flushing.
Returns a dictionary with the status of the operation.
Usage:
```
curl -X POST http://0.0.0.0:4000/cache/flushall -H "Authorization: Bearer sk-1234"
```
"""
try:
if litellm.cache is None:
raise HTTPException(
status_code=503, detail="Cache not initialized. litellm.cache is None"
)
if litellm.cache.type == "redis":
litellm.cache.cache.flushall()
return {
"status": "success",
}
else:
raise HTTPException(
status_code=500,
detail=f"Cache type {litellm.cache.type} does not support flushing",
)
except Exception as e:
raise HTTPException(
status_code=503,
detail=f"Service Unhealthy ({str(e)})",
)

View file

@ -1,91 +0,0 @@
from datetime import datetime
from functools import wraps
from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from fastapi import Request
def management_endpoint_wrapper(func):
"""
This wrapper does the following:
1. Log I/O, Exceptions to OTEL
2. Create an Audit log for success calls
"""
@wraps(func)
async def wrapper(*args, **kwargs):
start_time = datetime.now()
try:
result = await func(*args, **kwargs)
end_time = datetime.now()
if kwargs is None:
kwargs = {}
user_api_key_dict: UserAPIKeyAuth = (
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
)
parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None:
from litellm.proxy.proxy_server import open_telemetry_logger
if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request")
if _http_request:
_route = _http_request.url.path
_request_body: dict = await _read_request_body(
request=_http_request
)
_response = dict(result) if result is not None else None
logging_payload = ManagementEndpointLoggingPayload(
route=_route,
request_data=_request_body,
response=_response,
start_time=start_time,
end_time=end_time,
)
await open_telemetry_logger.async_management_endpoint_success_hook(
logging_payload=logging_payload,
parent_otel_span=parent_otel_span,
)
return result
except Exception as e:
end_time = datetime.now()
if kwargs is None:
kwargs = {}
user_api_key_dict: UserAPIKeyAuth = (
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
)
parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None:
from litellm.proxy.proxy_server import open_telemetry_logger
if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request")
if _http_request:
_route = _http_request.url.path
_request_body: dict = await _read_request_body(
request=_http_request
)
logging_payload = ManagementEndpointLoggingPayload(
route=_route,
request_data=_request_body,
response=None,
start_time=start_time,
end_time=end_time,
exception=e,
)
await open_telemetry_logger.async_management_endpoint_failure_hook(
logging_payload=logging_payload,
parent_otel_span=parent_otel_span,
)
raise e
return wrapper

View file

@ -0,0 +1,478 @@
from typing import Optional, Literal
import litellm
import os
import asyncio
import fastapi
import traceback
from datetime import datetime, timedelta
from fastapi import Depends, Request, APIRouter, Header, status
from litellm.proxy.health_check import perform_health_check
from fastapi import HTTPException
import copy
from litellm._logging import verbose_proxy_logger
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy._types import (
UserAPIKeyAuth,
ProxyException,
WebhookEvent,
CallInfo,
)
#### Health ENDPOINTS ####
router = APIRouter()
@router.get(
"/test",
tags=["health"],
dependencies=[Depends(user_api_key_auth)],
)
async def test_endpoint(request: Request):
"""
[DEPRECATED] use `/health/liveliness` instead.
A test endpoint that pings the proxy server to check if it's healthy.
Parameters:
request (Request): The incoming request.
Returns:
dict: A dictionary containing the route of the request URL.
"""
# ping the proxy server to check if its healthy
return {"route": request.url.path}
@router.get(
"/health/services",
tags=["health"],
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def health_services_endpoint(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
service: Literal[
"slack_budget_alerts", "langfuse", "slack", "openmeter", "webhook", "email"
] = fastapi.Query(description="Specify the service being hit."),
):
"""
Hidden endpoint.
Used by the UI to let user check if slack alerting is working as expected.
"""
try:
from litellm.proxy.proxy_server import (
proxy_logging_obj,
prisma_client,
general_settings,
)
if service is None:
raise HTTPException(
status_code=400, detail={"error": "Service must be specified."}
)
if service not in [
"slack_budget_alerts",
"email",
"langfuse",
"slack",
"openmeter",
"webhook",
]:
raise HTTPException(
status_code=400,
detail={
"error": f"Service must be in list. Service={service}. List={['slack_budget_alerts']}"
},
)
if service == "openmeter":
_ = await litellm.acompletion(
model="openai/litellm-mock-response-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
user="litellm:/health/services",
mock_response="This is a mock response",
)
return {
"status": "success",
"message": "Mock LLM request made - check openmeter.",
}
if service == "langfuse":
from litellm.integrations.langfuse import LangFuseLogger
langfuse_logger = LangFuseLogger()
langfuse_logger.Langfuse.auth_check()
_ = litellm.completion(
model="openai/litellm-mock-response-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
user="litellm:/health/services",
mock_response="This is a mock response",
)
return {
"status": "success",
"message": "Mock LLM request made - check langfuse.",
}
if service == "webhook":
user_info = CallInfo(
token=user_api_key_dict.token or "",
spend=1,
max_budget=0,
user_id=user_api_key_dict.user_id,
key_alias=user_api_key_dict.key_alias,
team_id=user_api_key_dict.team_id,
)
await proxy_logging_obj.budget_alerts(
type="user_budget",
user_info=user_info,
)
if service == "slack" or service == "slack_budget_alerts":
if "slack" in general_settings.get("alerting", []):
# test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700"""
# check if user has opted into unique_alert_webhooks
if (
proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url
is not None
):
for (
alert_type
) in proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url:
"""
"llm_exceptions",
"llm_too_slow",
"llm_requests_hanging",
"budget_alerts",
"db_exceptions",
"""
# only test alert if it's in active alert types
if (
proxy_logging_obj.slack_alerting_instance.alert_types
is not None
and alert_type
not in proxy_logging_obj.slack_alerting_instance.alert_types
):
continue
test_message = "default test message"
if alert_type == "llm_exceptions":
test_message = f"LLM Exception test alert"
elif alert_type == "llm_too_slow":
test_message = f"LLM Too Slow test alert"
elif alert_type == "llm_requests_hanging":
test_message = f"LLM Requests Hanging test alert"
elif alert_type == "budget_alerts":
test_message = f"Budget Alert test alert"
elif alert_type == "db_exceptions":
test_message = f"DB Exception test alert"
elif alert_type == "outage_alerts":
test_message = f"Outage Alert Exception test alert"
elif alert_type == "daily_reports":
test_message = f"Daily Reports test alert"
await proxy_logging_obj.alerting_handler(
message=test_message, level="Low", alert_type=alert_type
)
else:
await proxy_logging_obj.alerting_handler(
message="This is a test slack alert message",
level="Low",
alert_type="budget_alerts",
)
if prisma_client is not None:
asyncio.create_task(
proxy_logging_obj.slack_alerting_instance.send_monthly_spend_report()
)
asyncio.create_task(
proxy_logging_obj.slack_alerting_instance.send_weekly_spend_report()
)
alert_types = (
proxy_logging_obj.slack_alerting_instance.alert_types or []
)
alert_types = list(alert_types)
return {
"status": "success",
"alert_types": alert_types,
"message": "Mock Slack Alert sent, verify Slack Alert Received on your channel",
}
else:
raise HTTPException(
status_code=422,
detail={
"error": '"{}" not in proxy config: general_settings. Unable to test this.'.format(
service
)
},
)
if service == "email":
webhook_event = WebhookEvent(
event="key_created",
event_group="key",
event_message="Test Email Alert",
token=user_api_key_dict.token or "",
key_alias="Email Test key (This is only a test alert key. DO NOT USE THIS IN PRODUCTION.)",
spend=0,
max_budget=0,
user_id=user_api_key_dict.user_id,
user_email=os.getenv("TEST_EMAIL_ADDRESS"),
team_id=user_api_key_dict.team_id,
)
# use create task - this can take 10 seconds. don't keep ui users waiting for notification to check their email
asyncio.create_task(
proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email(
webhook_event=webhook_event
)
)
return {
"status": "success",
"message": "Mock Email Alert sent, verify Email Alert Received",
}
except Exception as e:
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
type="auth_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="Authentication Error, " + str(e),
type="auth_error",
param=getattr(e, "param", "None"),
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
@router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)])
async def health_endpoint(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
model: Optional[str] = fastapi.Query(
None, description="Specify the model name (optional)"
),
):
"""
🚨 USE `/health/liveliness` to health check the proxy 🚨
See more 👉 https://docs.litellm.ai/docs/proxy/health
Check the health of all the endpoints in config.yaml
To run health checks in the background, add this to config.yaml:
```
general_settings:
# ... other settings
background_health_checks: True
```
else, the health checks will be run on models when /health is called.
"""
from litellm.proxy.proxy_server import (
health_check_results,
use_background_health_checks,
user_model,
llm_model_list,
)
try:
if llm_model_list is None:
# if no router set, check if user set a model using litellm --model ollama/llama2
if user_model is not None:
healthy_endpoints, unhealthy_endpoints = await perform_health_check(
model_list=[], cli_model=user_model
)
return {
"healthy_endpoints": healthy_endpoints,
"unhealthy_endpoints": unhealthy_endpoints,
"healthy_count": len(healthy_endpoints),
"unhealthy_count": len(unhealthy_endpoints),
}
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={"error": "Model list not initialized"},
)
_llm_model_list = copy.deepcopy(llm_model_list)
### FILTER MODELS FOR ONLY THOSE USER HAS ACCESS TO ###
if len(user_api_key_dict.models) > 0:
allowed_model_names = user_api_key_dict.models
else:
allowed_model_names = [] #
if use_background_health_checks:
return health_check_results
else:
healthy_endpoints, unhealthy_endpoints = await perform_health_check(
_llm_model_list, model
)
return {
"healthy_endpoints": healthy_endpoints,
"unhealthy_endpoints": unhealthy_endpoints,
"healthy_count": len(healthy_endpoints),
"unhealthy_count": len(unhealthy_endpoints),
}
except Exception as e:
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e
db_health_cache = {"status": "unknown", "last_updated": datetime.now()}
def _db_health_readiness_check():
from litellm.proxy.proxy_server import prisma_client
global db_health_cache
# Note - Intentionally don't try/except this so it raises an exception when it fails
# if timedelta is less than 2 minutes return DB Status
time_diff = datetime.now() - db_health_cache["last_updated"]
if db_health_cache["status"] != "unknown" and time_diff < timedelta(minutes=2):
return db_health_cache
prisma_client.health_check()
db_health_cache = {"status": "connected", "last_updated": datetime.now()}
return db_health_cache
@router.get(
"/active/callbacks",
tags=["health"],
dependencies=[Depends(user_api_key_auth)],
)
async def active_callbacks():
"""
Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback
"""
from litellm.proxy.proxy_server import proxy_logging_obj, general_settings
_alerting = str(general_settings.get("alerting"))
# get success callbacks
litellm_callbacks = [str(x) for x in litellm.callbacks]
litellm_input_callbacks = [str(x) for x in litellm.input_callback]
litellm_failure_callbacks = [str(x) for x in litellm.failure_callback]
litellm_success_callbacks = [str(x) for x in litellm.success_callback]
litellm_async_success_callbacks = [str(x) for x in litellm._async_success_callback]
litellm_async_failure_callbacks = [str(x) for x in litellm._async_failure_callback]
litellm_async_input_callbacks = [str(x) for x in litellm._async_input_callback]
all_litellm_callbacks = (
litellm_callbacks
+ litellm_input_callbacks
+ litellm_failure_callbacks
+ litellm_success_callbacks
+ litellm_async_success_callbacks
+ litellm_async_failure_callbacks
+ litellm_async_input_callbacks
)
alerting = proxy_logging_obj.alerting
_num_alerting = 0
if alerting and isinstance(alerting, list):
_num_alerting = len(alerting)
return {
"alerting": _alerting,
"litellm.callbacks": litellm_callbacks,
"litellm.input_callback": litellm_input_callbacks,
"litellm.failure_callback": litellm_failure_callbacks,
"litellm.success_callback": litellm_success_callbacks,
"litellm._async_success_callback": litellm_async_success_callbacks,
"litellm._async_failure_callback": litellm_async_failure_callbacks,
"litellm._async_input_callback": litellm_async_input_callbacks,
"all_litellm_callbacks": all_litellm_callbacks,
"num_callbacks": len(all_litellm_callbacks),
"num_alerting": _num_alerting,
}
@router.get(
"/health/readiness",
tags=["health"],
dependencies=[Depends(user_api_key_auth)],
)
async def health_readiness():
"""
Unprotected endpoint for checking if worker can receive requests
"""
from litellm.proxy.proxy_server import proxy_logging_obj, prisma_client, version
try:
# get success callback
success_callback_names = []
try:
# this was returning a JSON of the values in some of the callbacks
# all we need is the callback name, hence we do str(callback)
success_callback_names = [str(x) for x in litellm.success_callback]
except:
# don't let this block the /health/readiness response, if we can't convert to str -> return litellm.success_callback
success_callback_names = litellm.success_callback
# check Cache
cache_type = None
if litellm.cache is not None:
from litellm.caching import RedisSemanticCache
cache_type = litellm.cache.type
if isinstance(litellm.cache.cache, RedisSemanticCache):
# ping the cache
# TODO: @ishaan-jaff - we should probably not ping the cache on every /health/readiness check
try:
index_info = await litellm.cache.cache._index_info()
except Exception as e:
index_info = "index does not exist - error: " + str(e)
cache_type = {"type": cache_type, "index_info": index_info}
# check DB
if prisma_client is not None: # if db passed in, check if it's connected
db_health_status = _db_health_readiness_check()
return {
"status": "healthy",
"db": "connected",
"cache": cache_type,
"litellm_version": version,
"success_callbacks": success_callback_names,
**db_health_status,
}
else:
return {
"status": "healthy",
"db": "Not connected",
"cache": cache_type,
"litellm_version": version,
"success_callbacks": success_callback_names,
}
except Exception as e:
raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")
@router.get(
"/health/liveliness",
tags=["health"],
dependencies=[Depends(user_api_key_auth)],
)
async def health_liveliness():
"""
Unprotected endpoint for checking if worker is alive
"""
return "I'm alive!"

View file

@ -0,0 +1,926 @@
"""
KEY MANAGEMENT
All /key management endpoints
/key/generate
/key/info
/key/update
/key/delete
"""
import copy
import json
import uuid
import re
import traceback
import asyncio
import secrets
from typing import Optional, List
import fastapi
from fastapi import Depends, Request, APIRouter, Header, status
from fastapi import HTTPException
import litellm
from datetime import datetime, timedelta, timezone
from litellm._logging import verbose_proxy_logger
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy._types import *
router = APIRouter()
@router.post(
"/key/generate",
tags=["key management"],
dependencies=[Depends(user_api_key_auth)],
response_model=GenerateKeyResponse,
)
async def generate_key_fn(
data: GenerateKeyRequest,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
),
):
"""
Generate an API key based on the provided data.
Docs: https://docs.litellm.ai/docs/proxy/virtual_keys
Parameters:
- duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
- key_alias: Optional[str] - User defined key alias
- team_id: Optional[str] - The team id of the key
- user_id: Optional[str] - The user id of the key
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
- aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
- config: Optional[dict] - any key-specific configs, overrides config in config.yaml
- spend: Optional[int] - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
- send_invite_email: Optional[bool] - Whether to send an invite email to the user_id, with the generate key
- max_budget: Optional[float] - Specify max budget for a given key.
- max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
- model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
Examples:
1. Allow users to turn on/off pii masking
```bash
curl --location 'http://0.0.0.0:8000/key/generate' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"permissions": {"allow_pii_controls": true}
}'
```
Returns:
- key: (str) The generated api key
- expires: (datetime) Datetime object for when key expires.
- user_id: (str) Unique user id - used for tracking spend across multiple keys for same user id.
"""
try:
from litellm.proxy.proxy_server import (
user_custom_key_generate,
prisma_client,
litellm_proxy_admin_name,
general_settings,
proxy_logging_obj,
create_audit_log_for_update,
)
verbose_proxy_logger.debug("entered /key/generate")
if user_custom_key_generate is not None:
result = await user_custom_key_generate(data)
decision = result.get("decision", True)
message = result.get("message", "Authentication Failed - Custom Auth Rule")
if not decision:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN, detail=message
)
# check if user set default key/generate params on config.yaml
if litellm.default_key_generate_params is not None:
for elem in data:
key, value = elem
if value is None and key in [
"max_budget",
"user_id",
"team_id",
"max_parallel_requests",
"tpm_limit",
"rpm_limit",
"budget_duration",
]:
setattr(
data, key, litellm.default_key_generate_params.get(key, None)
)
elif key == "models" and value == []:
setattr(data, key, litellm.default_key_generate_params.get(key, []))
elif key == "metadata" and value == {}:
setattr(data, key, litellm.default_key_generate_params.get(key, {}))
# check if user set default key/generate params on config.yaml
if litellm.upperbound_key_generate_params is not None:
for elem in data:
# if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
key, value = elem
if (
value is not None
and getattr(litellm.upperbound_key_generate_params, key, None)
is not None
):
# if value is float/int
if key in [
"max_budget",
"max_parallel_requests",
"tpm_limit",
"rpm_limit",
]:
if value > getattr(litellm.upperbound_key_generate_params, key):
raise HTTPException(
status_code=400,
detail={
"error": f"{key} is over max limit set in config - user_value={value}; max_value={getattr(litellm.upperbound_key_generate_params, key)}"
},
)
elif key == "budget_duration":
# budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
# compare the duration in seconds and max duration in seconds
upperbound_budget_duration = _duration_in_seconds(
duration=getattr(
litellm.upperbound_key_generate_params, key
)
)
user_set_budget_duration = _duration_in_seconds(duration=value)
if user_set_budget_duration > upperbound_budget_duration:
raise HTTPException(
status_code=400,
detail={
"error": f"Budget duration is over max limit set in config - user_value={user_set_budget_duration}; max_value={upperbound_budget_duration}"
},
)
# TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable
_budget_id = None
if prisma_client is not None and data.soft_budget is not None:
# create the Budget Row for the LiteLLM Verification Token
budget_row = LiteLLM_BudgetTable(
soft_budget=data.soft_budget,
model_max_budget=data.model_max_budget or {},
)
new_budget = prisma_client.jsonify_object(
budget_row.json(exclude_none=True)
)
_budget = await prisma_client.db.litellm_budgettable.create(
data={
**new_budget, # type: ignore
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
}
)
_budget_id = getattr(_budget, "budget_id", None)
data_json = data.json() # type: ignore
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
if "max_budget" in data_json:
data_json["key_max_budget"] = data_json.pop("max_budget", None)
if _budget_id is not None:
data_json["budget_id"] = _budget_id
if "budget_duration" in data_json:
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
response = await generate_key_helper_fn(
request_type="key", **data_json, table_name="key"
)
response["soft_budget"] = (
data.soft_budget
) # include the user-input soft budget in the response
if data.send_invite_email is True:
if "email" not in general_settings.get("alerting", []):
raise ValueError(
"Email alerting not setup on config.yaml. Please set `alerting=['email']. \nDocs: https://docs.litellm.ai/docs/proxy/email`"
)
event = WebhookEvent(
event="key_created",
event_group="key",
event_message=f"API Key Created",
token=response.get("token", ""),
spend=response.get("spend", 0.0),
max_budget=response.get("max_budget", 0.0),
user_id=response.get("user_id", None),
team_id=response.get("team_id", "Default Team"),
key_alias=response.get("key_alias", None),
)
# If user configured email alerting - send an Email letting their end-user know the key was created
asyncio.create_task(
proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email(
webhook_event=event,
)
)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = json.dumps(response, default=str)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=litellm_changed_by
or user_api_key_dict.user_id
or litellm_proxy_admin_name,
changed_by_api_key=user_api_key_dict.api_key,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=response.get("token_id", ""),
action="created",
updated_values=_updated_values,
before_value=None,
)
)
)
return GenerateKeyResponse(**response)
except Exception as e:
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
type="auth_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="Authentication Error, " + str(e),
type="auth_error",
param=getattr(e, "param", "None"),
code=status.HTTP_400_BAD_REQUEST,
)
@router.post(
"/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
)
async def update_key_fn(
request: Request,
data: UpdateKeyRequest,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
),
):
"""
Update an existing key
"""
from litellm.proxy.proxy_server import (
user_custom_key_generate,
prisma_client,
litellm_proxy_admin_name,
general_settings,
proxy_logging_obj,
create_audit_log_for_update,
user_api_key_cache,
)
try:
data_json: dict = data.json()
key = data_json.pop("key")
# get the row from db
if prisma_client is None:
raise Exception("Not connected to DB!")
existing_key_row = await prisma_client.get_data(
token=data.key, table_name="key", query_type="find_unique"
)
if existing_key_row is None:
raise HTTPException(
status_code=404,
detail={"error": f"Team not found, passed team_id={data.team_id}"},
)
# get non default values for key
non_default_values = {}
for k, v in data_json.items():
if v is not None and v not in (
[],
{},
0,
): # models default to [], spend defaults to 0, we should not reset these values
non_default_values[k] = v
if "duration" in non_default_values:
duration = non_default_values.pop("duration")
duration_s = _duration_in_seconds(duration=duration)
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
non_default_values["expires"] = expires
response = await prisma_client.update_data(
token=key, data={**non_default_values, "token": key}
)
# Delete - key from cache, since it's been updated!
# key updated - a new model could have been added to this key. it should not block requests after this is done
user_api_key_cache.delete_cache(key)
hashed_token = hash_token(key)
user_api_key_cache.delete_cache(hashed_token)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = json.dumps(data_json, default=str)
_before_value = existing_key_row.json(exclude_none=True)
_before_value = json.dumps(_before_value, default=str)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=litellm_changed_by
or user_api_key_dict.user_id
or litellm_proxy_admin_name,
changed_by_api_key=user_api_key_dict.api_key,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=data.key,
action="updated",
updated_values=_updated_values,
before_value=_before_value,
)
)
)
return {"key": key, **response["data"]}
# update based on remaining passed in values
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
type="auth_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="Authentication Error, " + str(e),
type="auth_error",
param=getattr(e, "param", "None"),
code=status.HTTP_400_BAD_REQUEST,
)
@router.post(
"/key/delete", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
)
async def delete_key_fn(
data: KeyRequest,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
),
):
"""
Delete a key from the key management system.
Parameters::
- keys (List[str]): A list of keys or hashed keys to delete. Example {"keys": ["sk-QWrxEynunsNpV1zT48HIrw", "837e17519f44683334df5291321d97b8bf1098cd490e49e215f6fea935aa28be"]}
Returns:
- deleted_keys (List[str]): A list of deleted keys. Example {"deleted_keys": ["sk-QWrxEynunsNpV1zT48HIrw", "837e17519f44683334df5291321d97b8bf1098cd490e49e215f6fea935aa28be"]}
Raises:
HTTPException: If an error occurs during key deletion.
"""
try:
from litellm.proxy.proxy_server import (
user_custom_key_generate,
prisma_client,
litellm_proxy_admin_name,
general_settings,
proxy_logging_obj,
create_audit_log_for_update,
user_api_key_cache,
)
keys = data.keys
if len(keys) == 0:
raise ProxyException(
message=f"No keys provided, passed in: keys={keys}",
type="auth_error",
param="keys",
code=status.HTTP_400_BAD_REQUEST,
)
## only allow user to delete keys they own
user_id = user_api_key_dict.user_id
verbose_proxy_logger.debug(
f"user_api_key_dict.user_role: {user_api_key_dict.user_role}"
)
if (
user_api_key_dict.user_role is not None
and user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
):
user_id = None # unless they're admin
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
if litellm.store_audit_logs is True:
# make an audit log for each team deleted
for key in data.keys:
key_row = await prisma_client.get_data( # type: ignore
token=key, table_name="key", query_type="find_unique"
)
key_row = key_row.json(exclude_none=True)
_key_row = json.dumps(key_row, default=str)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=litellm_changed_by
or user_api_key_dict.user_id
or litellm_proxy_admin_name,
changed_by_api_key=user_api_key_dict.api_key,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=key,
action="deleted",
updated_values="{}",
before_value=_key_row,
)
)
)
number_deleted_keys = await delete_verification_token(
tokens=keys, user_id=user_id
)
verbose_proxy_logger.debug(
f"/key/delete - deleted_keys={number_deleted_keys['deleted_keys']}"
)
try:
assert len(keys) == number_deleted_keys["deleted_keys"]
except Exception as e:
raise HTTPException(
status_code=400,
detail={
"error": f"Not all keys passed in were deleted. This probably means you don't have access to delete all the keys passed in. Keys passed in={len(keys)}, Deleted keys ={number_deleted_keys['deleted_keys']}"
},
)
for key in keys:
user_api_key_cache.delete_cache(key)
# remove hash token from cache
hashed_token = hash_token(key)
user_api_key_cache.delete_cache(hashed_token)
verbose_proxy_logger.debug(
f"/keys/delete - cache after delete: {user_api_key_cache.in_memory_cache.cache_dict}"
)
return {"deleted_keys": keys}
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
type="auth_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="Authentication Error, " + str(e),
type="auth_error",
param=getattr(e, "param", "None"),
code=status.HTTP_400_BAD_REQUEST,
)
@router.post(
"/v2/key/info", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
)
async def info_key_fn_v2(
data: Optional[KeyRequest] = None,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Retrieve information about a list of keys.
**New endpoint**. Currently admin only.
Parameters:
keys: Optional[list] = body parameter representing the key(s) in the request
user_api_key_dict: UserAPIKeyAuth = Dependency representing the user's API key
Returns:
Dict containing the key and its associated information
Example Curl:
```
curl -X GET "http://0.0.0.0:8000/key/info" \
-H "Authorization: Bearer sk-1234" \
-d {"keys": ["sk-1", "sk-2", "sk-3"]}
```
"""
from litellm.proxy.proxy_server import (
user_custom_key_generate,
prisma_client,
litellm_proxy_admin_name,
general_settings,
proxy_logging_obj,
create_audit_log_for_update,
)
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
if data is None:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail={"message": "Malformed request. No keys passed in."},
)
key_info = await prisma_client.get_data(
token=data.keys, table_name="key", query_type="find_all"
)
filtered_key_info = []
for k in key_info:
try:
k = k.model_dump() # noqa
except:
# if using pydantic v1
k = k.dict()
filtered_key_info.append(k)
return {"key": data.keys, "info": filtered_key_info}
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
type="auth_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="Authentication Error, " + str(e),
type="auth_error",
param=getattr(e, "param", "None"),
code=status.HTTP_400_BAD_REQUEST,
)
@router.get(
"/key/info", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
)
async def info_key_fn(
key: Optional[str] = fastapi.Query(
default=None, description="Key in the request parameters"
),
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Retrieve information about a key.
Parameters:
key: Optional[str] = Query parameter representing the key in the request
user_api_key_dict: UserAPIKeyAuth = Dependency representing the user's API key
Returns:
Dict containing the key and its associated information
Example Curl:
```
curl -X GET "http://0.0.0.0:8000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
-H "Authorization: Bearer sk-1234"
```
Example Curl - if no key is passed, it will use the Key Passed in Authorization Header
```
curl -X GET "http://0.0.0.0:8000/key/info" \
-H "Authorization: Bearer sk-02Wr4IAlN3NvPXvL5JVvDA"
```
"""
from litellm.proxy.proxy_server import (
user_custom_key_generate,
prisma_client,
litellm_proxy_admin_name,
general_settings,
proxy_logging_obj,
create_audit_log_for_update,
)
try:
if prisma_client is None:
raise Exception(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
if key == None:
key = user_api_key_dict.api_key
key_info = await prisma_client.get_data(token=key)
## REMOVE HASHED TOKEN INFO BEFORE RETURNING ##
try:
key_info = key_info.model_dump() # noqa
except:
# if using pydantic v1
key_info = key_info.dict()
key_info.pop("token")
return {"key": key, "info": key_info}
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
type="auth_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="Authentication Error, " + str(e),
type="auth_error",
param=getattr(e, "param", "None"),
code=status.HTTP_400_BAD_REQUEST,
)
def _duration_in_seconds(duration: str):
match = re.match(r"(\d+)([smhd]?)", duration)
if not match:
raise ValueError("Invalid duration format")
value, unit = match.groups()
value = int(value)
if unit == "s":
return value
elif unit == "m":
return value * 60
elif unit == "h":
return value * 3600
elif unit == "d":
return value * 86400
else:
raise ValueError("Unsupported duration unit")
async def generate_key_helper_fn(
request_type: Literal[
"user", "key"
], # identifies if this request is from /user/new or /key/generate
duration: Optional[str],
models: list,
aliases: dict,
config: dict,
spend: float,
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
key_budget_duration: Optional[str] = None,
budget_id: Optional[float] = None, # budget id <-> LiteLLM_BudgetTable
soft_budget: Optional[
float
] = None, # soft_budget is used to set soft Budgets Per user
max_budget: Optional[float] = None, # max_budget is used to Budget Per user
budget_duration: Optional[str] = None, # max_budget is used to Budget Per user
token: Optional[str] = None,
user_id: Optional[str] = None,
team_id: Optional[str] = None,
user_email: Optional[str] = None,
user_role: Optional[str] = None,
max_parallel_requests: Optional[int] = None,
metadata: Optional[dict] = {},
tpm_limit: Optional[int] = None,
rpm_limit: Optional[int] = None,
query_type: Literal["insert_data", "update_data"] = "insert_data",
update_key_values: Optional[dict] = None,
key_alias: Optional[str] = None,
allowed_cache_controls: Optional[list] = [],
permissions: Optional[dict] = {},
model_max_budget: Optional[dict] = {},
teams: Optional[list] = None,
organization_id: Optional[str] = None,
table_name: Optional[Literal["key", "user"]] = None,
send_invite_email: Optional[bool] = None,
):
from litellm.proxy.proxy_server import (
prisma_client,
custom_db_client,
litellm_proxy_budget_name,
premium_user,
)
if prisma_client is None and custom_db_client is None:
raise Exception(
f"Connect Proxy to database to generate keys - https://docs.litellm.ai/docs/proxy/virtual_keys "
)
if token is None:
token = f"sk-{secrets.token_urlsafe(16)}"
if duration is None: # allow tokens that never expire
expires = None
else:
duration_s = _duration_in_seconds(duration=duration)
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
if key_budget_duration is None: # one-time budget
key_reset_at = None
else:
duration_s = _duration_in_seconds(duration=key_budget_duration)
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
if budget_duration is None: # one-time budget
reset_at = None
else:
duration_s = _duration_in_seconds(duration=budget_duration)
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
aliases_json = json.dumps(aliases)
config_json = json.dumps(config)
permissions_json = json.dumps(permissions)
metadata_json = json.dumps(metadata)
model_max_budget_json = json.dumps(model_max_budget)
user_role = user_role
tpm_limit = tpm_limit
rpm_limit = rpm_limit
allowed_cache_controls = allowed_cache_controls
try:
# Create a new verification token (you may want to enhance this logic based on your needs)
user_data = {
"max_budget": max_budget,
"user_email": user_email,
"user_id": user_id,
"team_id": team_id,
"organization_id": organization_id,
"user_role": user_role,
"spend": spend,
"models": models,
"max_parallel_requests": max_parallel_requests,
"tpm_limit": tpm_limit,
"rpm_limit": rpm_limit,
"budget_duration": budget_duration,
"budget_reset_at": reset_at,
"allowed_cache_controls": allowed_cache_controls,
}
if teams is not None:
user_data["teams"] = teams
key_data = {
"token": token,
"key_alias": key_alias,
"expires": expires,
"models": models,
"aliases": aliases_json,
"config": config_json,
"spend": spend,
"max_budget": key_max_budget,
"user_id": user_id,
"team_id": team_id,
"max_parallel_requests": max_parallel_requests,
"metadata": metadata_json,
"tpm_limit": tpm_limit,
"rpm_limit": rpm_limit,
"budget_duration": key_budget_duration,
"budget_reset_at": key_reset_at,
"allowed_cache_controls": allowed_cache_controls,
"permissions": permissions_json,
"model_max_budget": model_max_budget_json,
"budget_id": budget_id,
}
if (
litellm.get_secret("DISABLE_KEY_NAME", False) == True
): # allow user to disable storing abbreviated key name (shown in UI, to help figure out which key spent how much)
pass
else:
key_data["key_name"] = f"sk-...{token[-4:]}"
saved_token = copy.deepcopy(key_data)
if isinstance(saved_token["aliases"], str):
saved_token["aliases"] = json.loads(saved_token["aliases"])
if isinstance(saved_token["config"], str):
saved_token["config"] = json.loads(saved_token["config"])
if isinstance(saved_token["metadata"], str):
saved_token["metadata"] = json.loads(saved_token["metadata"])
if isinstance(saved_token["permissions"], str):
if (
"get_spend_routes" in saved_token["permissions"]
and premium_user != True
):
raise ValueError(
"get_spend_routes permission is only available for LiteLLM Enterprise users"
)
saved_token["permissions"] = json.loads(saved_token["permissions"])
if isinstance(saved_token["model_max_budget"], str):
saved_token["model_max_budget"] = json.loads(
saved_token["model_max_budget"]
)
if saved_token.get("expires", None) is not None and isinstance(
saved_token["expires"], datetime
):
saved_token["expires"] = saved_token["expires"].isoformat()
if prisma_client is not None:
if (
table_name is None or table_name == "user"
): # do not auto-create users for `/key/generate`
## CREATE USER (If necessary)
if query_type == "insert_data":
user_row = await prisma_client.insert_data(
data=user_data, table_name="user"
)
## use default user model list if no key-specific model list provided
if len(user_row.models) > 0 and len(key_data["models"]) == 0: # type: ignore
key_data["models"] = user_row.models
elif query_type == "update_data":
user_row = await prisma_client.update_data(
data=user_data,
table_name="user",
update_key_values=update_key_values,
)
if user_id == litellm_proxy_budget_name or (
table_name is not None and table_name == "user"
):
# do not create a key for litellm_proxy_budget_name or if table name is set to just 'user'
# we only need to ensure this exists in the user table
# the LiteLLM_VerificationToken table will increase in size if we don't do this check
return user_data
## CREATE KEY
verbose_proxy_logger.debug("prisma_client: Creating Key= %s", key_data)
create_key_response = await prisma_client.insert_data(
data=key_data, table_name="key"
)
key_data["token_id"] = getattr(create_key_response, "token", None)
except Exception as e:
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise e
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={"error": "Internal Server Error."},
)
# Add budget related info in key_data - this ensures it's returned
key_data["budget_id"] = budget_id
if request_type == "user":
# if this is a /user/new request update the key_date with user_data fields
key_data.update(user_data)
return key_data
async def delete_verification_token(tokens: List, user_id: Optional[str] = None):
from litellm.proxy.proxy_server import prisma_client, litellm_proxy_admin_name
try:
if prisma_client:
# Assuming 'db' is your Prisma Client instance
# check if admin making request - don't filter by user-id
if user_id == litellm_proxy_admin_name:
deleted_tokens = await prisma_client.delete_data(tokens=tokens)
# else
else:
deleted_tokens = await prisma_client.delete_data(
tokens=tokens, user_id=user_id
)
_num_deleted_tokens = deleted_tokens.get("deleted_keys", 0)
if _num_deleted_tokens != len(tokens):
raise Exception(
"Failed to delete all tokens. Tried to delete tokens that don't belong to user: "
+ str(user_id)
)
else:
raise Exception("DB not connected. prisma_client is None")
except Exception as e:
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e
return deleted_tokens

View file

@ -0,0 +1,899 @@
from typing import Optional, List
import fastapi
from fastapi import Depends, Request, APIRouter, Header, status
from fastapi import HTTPException
import copy
import json
import uuid
import litellm
import asyncio
from datetime import datetime, timedelta, timezone
from litellm._logging import verbose_proxy_logger
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy._types import (
UserAPIKeyAuth,
LiteLLM_TeamTable,
LiteLLM_ModelTable,
LitellmUserRoles,
NewTeamRequest,
TeamMemberAddRequest,
UpdateTeamRequest,
BlockTeamRequest,
DeleteTeamRequest,
Member,
LitellmTableNames,
LiteLLM_AuditLogs,
TeamMemberDeleteRequest,
ProxyException,
CommonProxyErrors,
)
from litellm.proxy.management_helpers.utils import (
add_new_member,
management_endpoint_wrapper,
)
router = APIRouter()
#### TEAM MANAGEMENT ####
@router.post(
"/team/new",
tags=["team management"],
dependencies=[Depends(user_api_key_auth)],
response_model=LiteLLM_TeamTable,
)
@management_endpoint_wrapper
async def new_team(
data: NewTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
),
):
"""
Allow users to create a new team. Apply user permissions to their team.
👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
Parameters:
- team_alias: Optional[str] - User defined team alias
- team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
- members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
Returns:
- team_id: (str) Unique team id - used for tracking spend across multiple keys for same team id.
_deprecated_params:
- admins: list - A list of user_id's for the admin role
- users: list - A list of user_id's for the user role
Example Request:
```
curl --location 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"team_alias": "my-new-team_2",
"members_with_roles": [{"role": "admin", "user_id": "user-1234"},
{"role": "user", "user_id": "user-2434"}]
}'
```
```
curl --location 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"team_alias": "QA Prod Bot",
"max_budget": 0.000000001,
"budget_duration": "1d"
}'
```
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
if data.team_id is None:
data.team_id = str(uuid.uuid4())
else:
# Check if team_id exists already
_existing_team_id = await prisma_client.get_data(
team_id=data.team_id, table_name="team", query_type="find_unique"
)
if _existing_team_id is not None:
raise HTTPException(
status_code=400,
detail={
"error": f"Team id = {data.team_id} already exists. Please use a different team id."
},
)
if (
user_api_key_dict.user_role is None
or user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
): # don't restrict proxy admin
if (
data.tpm_limit is not None
and user_api_key_dict.tpm_limit is not None
and data.tpm_limit > user_api_key_dict.tpm_limit
):
raise HTTPException(
status_code=400,
detail={
"error": f"tpm limit higher than user max. User tpm limit={user_api_key_dict.tpm_limit}. User role={user_api_key_dict.user_role}"
},
)
if (
data.rpm_limit is not None
and user_api_key_dict.rpm_limit is not None
and data.rpm_limit > user_api_key_dict.rpm_limit
):
raise HTTPException(
status_code=400,
detail={
"error": f"rpm limit higher than user max. User rpm limit={user_api_key_dict.rpm_limit}. User role={user_api_key_dict.user_role}"
},
)
if (
data.max_budget is not None
and user_api_key_dict.max_budget is not None
and data.max_budget > user_api_key_dict.max_budget
):
raise HTTPException(
status_code=400,
detail={
"error": f"max budget higher than user max. User max budget={user_api_key_dict.max_budget}. User role={user_api_key_dict.user_role}"
},
)
if data.models is not None and len(user_api_key_dict.models) > 0:
for m in data.models:
if m not in user_api_key_dict.models:
raise HTTPException(
status_code=400,
detail={
"error": f"Model not in allowed user models. User allowed models={user_api_key_dict.models}. User id={user_api_key_dict.user_id}"
},
)
if user_api_key_dict.user_id is not None:
creating_user_in_list = False
for member in data.members_with_roles:
if member.user_id == user_api_key_dict.user_id:
creating_user_in_list = True
if creating_user_in_list == False:
data.members_with_roles.append(
Member(role="admin", user_id=user_api_key_dict.user_id)
)
## ADD TO MODEL TABLE
_model_id = None
if data.model_aliases is not None and isinstance(data.model_aliases, dict):
litellm_modeltable = LiteLLM_ModelTable(
model_aliases=json.dumps(data.model_aliases),
created_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
updated_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
)
model_dict = await prisma_client.db.litellm_modeltable.create(
{**litellm_modeltable.json(exclude_none=True)} # type: ignore
) # type: ignore
_model_id = model_dict.id
## ADD TO TEAM TABLE
complete_team_data = LiteLLM_TeamTable(
**data.json(),
model_id=_model_id,
)
# If budget_duration is set, set `budget_reset_at`
if complete_team_data.budget_duration is not None:
duration_s = _duration_in_seconds(duration=complete_team_data.budget_duration)
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
complete_team_data.budget_reset_at = reset_at
team_row = await prisma_client.insert_data(
data=complete_team_data.json(exclude_none=True), table_name="team"
)
## ADD TEAM ID TO USER TABLE ##
for user in complete_team_data.members_with_roles:
## add team id to user row ##
await prisma_client.update_data(
user_id=user.user_id,
data={"user_id": user.user_id, "teams": [team_row.team_id]},
update_key_values_custom_query={
"teams": {
"push ": [team_row.team_id],
}
},
)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = complete_team_data.json(exclude_none=True)
_updated_values = json.dumps(_updated_values, default=str)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=litellm_changed_by
or user_api_key_dict.user_id
or litellm_proxy_admin_name,
changed_by_api_key=user_api_key_dict.api_key,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=data.team_id,
action="created",
updated_values=_updated_values,
before_value=None,
)
)
)
try:
return team_row.model_dump()
except Exception as e:
return team_row.dict()
@router.post(
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def update_team(
data: UpdateTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
),
):
"""
Use `/team/member_add` AND `/team/member/delete` to add/remove new team members
You can now update team budget / rate limits via /team/update
Parameters:
- team_id: str - The team id of the user. Required param.
- team_alias: Optional[str] - User defined team alias
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
Example - update team TPM Limit
```
curl --location 'http://0.0.0.0:8000/team/update' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data-raw '{
"team_id": "litellm-test-client-id-new",
"tpm_limit": 100
}'
```
Example - Update Team `max_budget` budget
```
curl --location 'http://0.0.0.0:8000/team/update' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data-raw '{
"team_id": "litellm-test-client-id-new",
"max_budget": 10
}'
```
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
if data.team_id is None:
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
verbose_proxy_logger.debug("/team/update - %s", data)
existing_team_row = await prisma_client.get_data(
team_id=data.team_id, table_name="team", query_type="find_unique"
)
if existing_team_row is None:
raise HTTPException(
status_code=404,
detail={"error": f"Team not found, passed team_id={data.team_id}"},
)
updated_kv = data.json(exclude_none=True)
# Check budget_duration and budget_reset_at
if data.budget_duration is not None:
duration_s = _duration_in_seconds(duration=data.budget_duration)
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
# set the budget_reset_at in DB
updated_kv["budget_reset_at"] = reset_at
team_row = await prisma_client.update_data(
update_key_values=updated_kv,
data=updated_kv,
table_name="team",
team_id=data.team_id,
)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_before_value = existing_team_row.json(exclude_none=True)
_before_value = json.dumps(_before_value, default=str)
_after_value: str = json.dumps(updated_kv, default=str)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=litellm_changed_by
or user_api_key_dict.user_id
or litellm_proxy_admin_name,
changed_by_api_key=user_api_key_dict.api_key,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=data.team_id,
action="updated",
updated_values=_after_value,
before_value=_before_value,
)
)
)
return team_row
@router.post(
"/team/member_add",
tags=["team management"],
dependencies=[Depends(user_api_key_auth)],
)
@management_endpoint_wrapper
async def team_member_add(
data: TeamMemberAddRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
[BETA]
Add new members (either via user_email or user_id) to a team
If user doesn't exist, new user row will also be added to User Table
```
curl -X POST 'http://0.0.0.0:4000/team/member_add' \
-H 'Authorization: Bearer sk-1234' \
-H 'Content-Type: application/json' \
-d '{"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849", "member": {"role": "user", "user_id": "krrish247652@berri.ai"}}'
```
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
if data.team_id is None:
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
if data.member is None:
raise HTTPException(
status_code=400, detail={"error": "No member/members passed in"}
)
existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": data.team_id}
)
if existing_team_row is None:
raise HTTPException(
status_code=404,
detail={
"error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
},
)
complete_team_data = LiteLLM_TeamTable(**existing_team_row.model_dump())
if isinstance(data.member, Member):
# add to team db
new_member = data.member
complete_team_data.members_with_roles.append(new_member)
elif isinstance(data.member, List):
# add to team db
new_members = data.member
complete_team_data.members_with_roles.extend(new_members)
# ADD MEMBER TO TEAM
_db_team_members = [m.model_dump() for m in complete_team_data.members_with_roles]
updated_team = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id},
data={"members_with_roles": json.dumps(_db_team_members)}, # type: ignore
)
if isinstance(data.member, Member):
await add_new_member(
new_member=data.member,
max_budget_in_team=data.max_budget_in_team,
prisma_client=prisma_client,
user_api_key_dict=user_api_key_dict,
litellm_proxy_admin_name=litellm_proxy_admin_name,
team_id=data.team_id,
)
elif isinstance(data.member, List):
tasks: List = []
for m in data.member:
await add_new_member(
new_member=m,
max_budget_in_team=data.max_budget_in_team,
prisma_client=prisma_client,
user_api_key_dict=user_api_key_dict,
litellm_proxy_admin_name=litellm_proxy_admin_name,
team_id=data.team_id,
)
await asyncio.gather(*tasks)
return updated_team
@router.post(
"/team/member_delete",
tags=["team management"],
dependencies=[Depends(user_api_key_auth)],
)
@management_endpoint_wrapper
async def team_member_delete(
data: TeamMemberDeleteRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
[BETA]
delete members (either via user_email or user_id) from a team
If user doesn't exist, an exception will be raised
```
curl -X POST 'http://0.0.0.0:8000/team/update' \
-H 'Authorization: Bearer sk-1234' \
-H 'Content-Type: application/json' \
-D '{
"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849",
"user_id": "krrish247652@berri.ai"
}'
```
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
if data.team_id is None:
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
if data.user_id is None and data.user_email is None:
raise HTTPException(
status_code=400,
detail={"error": "Either user_id or user_email needs to be passed in"},
)
_existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": data.team_id}
)
if _existing_team_row is None:
raise HTTPException(
status_code=400,
detail={"error": "Team id={} does not exist in db".format(data.team_id)},
)
existing_team_row = LiteLLM_TeamTable(**_existing_team_row.model_dump())
## DELETE MEMBER FROM TEAM
new_team_members: List[Member] = []
for m in existing_team_row.members_with_roles:
if (
data.user_id is not None
and m.user_id is not None
and data.user_id == m.user_id
):
continue
elif (
data.user_email is not None
and m.user_email is not None
and data.user_email == m.user_email
):
continue
new_team_members.append(m)
existing_team_row.members_with_roles = new_team_members
_db_new_team_members: List[dict] = [m.model_dump() for m in new_team_members]
_ = await prisma_client.db.litellm_teamtable.update(
where={
"team_id": data.team_id,
},
data={"members_with_roles": json.dumps(_db_new_team_members)}, # type: ignore
)
## DELETE TEAM ID from USER ROW, IF EXISTS ##
# get user row
key_val = {}
if data.user_id is not None:
key_val["user_id"] = data.user_id
elif data.user_email is not None:
key_val["user_email"] = data.user_email
existing_user_rows = await prisma_client.db.litellm_usertable.find_many(
where=key_val # type: ignore
)
if existing_user_rows is not None and (
isinstance(existing_user_rows, list) and len(existing_user_rows) > 0
):
for existing_user in existing_user_rows:
team_list = []
if data.team_id in existing_user.teams:
team_list = existing_user.teams
team_list.remove(data.team_id)
await prisma_client.db.litellm_usertable.update(
where={
"user_id": existing_user.user_id,
},
data={"teams": {"set": team_list}},
)
return existing_team_row
@router.post(
"/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def delete_team(
data: DeleteTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
),
):
"""
delete team and associated team keys
```
curl --location 'http://0.0.0.0:8000/team/delete' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data-raw '{
"team_ids": ["45e3e396-ee08-4a61-a88e-16b3ce7e0849"]
}'
```
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
if data.team_ids is None:
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
# check that all teams passed exist
for team_id in data.team_ids:
team_row = await prisma_client.get_data( # type: ignore
team_id=team_id, table_name="team", query_type="find_unique"
)
if team_row is None:
raise HTTPException(
status_code=404,
detail={"error": f"Team not found, passed team_id={team_id}"},
)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
if litellm.store_audit_logs is True:
# make an audit log for each team deleted
for team_id in data.team_ids:
team_row = await prisma_client.get_data( # type: ignore
team_id=team_id, table_name="team", query_type="find_unique"
)
_team_row = team_row.json(exclude_none=True)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=litellm_changed_by
or user_api_key_dict.user_id
or litellm_proxy_admin_name,
changed_by_api_key=user_api_key_dict.api_key,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=team_id,
action="deleted",
updated_values="{}",
before_value=_team_row,
)
)
)
# End of Audit logging
## DELETE ASSOCIATED KEYS
await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
## DELETE TEAMS
deleted_teams = await prisma_client.delete_data(
team_id_list=data.team_ids, table_name="team"
)
return deleted_teams
@router.get(
"/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def team_info(
http_request: Request,
team_id: str = fastapi.Query(
default=None, description="Team ID in the request parameters"
),
):
"""
get info on team + related keys
```
curl --location 'http://localhost:4000/team/info' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"teams": ["<team-id>",..]
}'
```
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
try:
if prisma_client is None:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={
"error": f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
},
)
if team_id is None:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail={"message": "Malformed request. No team id passed in."},
)
team_info = await prisma_client.get_data(
team_id=team_id, table_name="team", query_type="find_unique"
)
if team_info is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"message": f"Team not found, passed team id: {team_id}."},
)
## GET ALL KEYS ##
keys = await prisma_client.get_data(
team_id=team_id,
table_name="key",
query_type="find_all",
expires=datetime.now(),
)
if team_info is None:
## make sure we still return a total spend ##
spend = 0
for k in keys:
spend += getattr(k, "spend", 0)
team_info = {"spend": spend}
## REMOVE HASHED TOKEN INFO before returning ##
for key in keys:
try:
key = key.model_dump() # noqa
except:
# if using pydantic v1
key = key.dict()
key.pop("token", None)
return {"team_id": team_id, "team_info": team_info, "keys": keys}
except Exception as e:
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
type="auth_error",
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
elif isinstance(e, ProxyException):
raise e
raise ProxyException(
message="Authentication Error, " + str(e),
type="auth_error",
param=getattr(e, "param", "None"),
code=status.HTTP_400_BAD_REQUEST,
)
@router.post(
"/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def block_team(
data: BlockTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Blocks all calls from keys with this team id.
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
if prisma_client is None:
raise Exception("No DB Connected.")
record = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id}, data={"blocked": True} # type: ignore
)
return record
@router.post(
"/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def unblock_team(
data: BlockTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Blocks all calls from keys with this team id.
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
if prisma_client is None:
raise Exception("No DB Connected.")
record = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id}, data={"blocked": False} # type: ignore
)
return record
@router.get(
"/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def list_team(
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
[Admin-only] List all available teams
```
curl --location --request GET 'http://0.0.0.0:4000/team/list' \
--header 'Authorization: Bearer sk-1234'
```
"""
from litellm.proxy.proxy_server import (
prisma_client,
litellm_proxy_admin_name,
create_audit_log_for_update,
_duration_in_seconds,
)
if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN:
raise HTTPException(
status_code=401,
detail={
"error": "Admin-only endpoint. Your user role={}".format(
user_api_key_dict.user_role
)
},
)
if prisma_client is None:
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
response = await prisma_client.db.litellm_teamtable.find_many()
return response

View file

@ -1,5 +1,11 @@
# What is this? # What is this?
## Helper utils for the management endpoints (keys/users/teams) ## Helper utils for the management endpoints (keys/users/teams)
from datetime import datetime
from functools import wraps
from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm._logging import verbose_logger
from fastapi import Request
from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth
from litellm.proxy.utils import PrismaClient from litellm.proxy.utils import PrismaClient
@ -61,3 +67,110 @@ async def add_new_member(
"budget_id": _budget_id, "budget_id": _budget_id,
} }
) )
def management_endpoint_wrapper(func):
"""
This wrapper does the following:
1. Log I/O, Exceptions to OTEL
2. Create an Audit log for success calls
"""
@wraps(func)
async def wrapper(*args, **kwargs):
start_time = datetime.now()
try:
result = await func(*args, **kwargs)
end_time = datetime.now()
try:
if kwargs is None:
kwargs = {}
user_api_key_dict: UserAPIKeyAuth = (
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
)
_http_request: Request = kwargs.get("http_request")
parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None:
from litellm.proxy.proxy_server import open_telemetry_logger
if open_telemetry_logger is not None:
if _http_request:
_route = _http_request.url.path
_request_body: dict = await _read_request_body(
request=_http_request
)
_response = dict(result) if result is not None else None
logging_payload = ManagementEndpointLoggingPayload(
route=_route,
request_data=_request_body,
response=_response,
start_time=start_time,
end_time=end_time,
)
await open_telemetry_logger.async_management_endpoint_success_hook(
logging_payload=logging_payload,
parent_otel_span=parent_otel_span,
)
if _http_request:
_route = _http_request.url.path
# Flush user_api_key cache if this was an update/delete call to /key, /team, or /user
if _route in [
"/key/update",
"/key/delete",
"/team/update",
"/team/delete",
"/user/update",
"/user/delete",
"/customer/update",
"/customer/delete",
]:
from litellm.proxy.proxy_server import user_api_key_cache
user_api_key_cache.flush_cache()
except Exception as e:
# Non-Blocking Exception
verbose_logger.debug("Error in management endpoint wrapper: %s", str(e))
pass
return result
except Exception as e:
end_time = datetime.now()
if kwargs is None:
kwargs = {}
user_api_key_dict: UserAPIKeyAuth = (
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
)
parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None:
from litellm.proxy.proxy_server import open_telemetry_logger
if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request")
if _http_request:
_route = _http_request.url.path
_request_body: dict = await _read_request_body(
request=_http_request
)
logging_payload = ManagementEndpointLoggingPayload(
route=_route,
request_data=_request_body,
response=None,
start_time=start_time,
end_time=end_time,
exception=e,
)
await open_telemetry_logger.async_management_endpoint_failure_hook(
logging_payload=logging_payload,
parent_otel_span=parent_otel_span,
)
raise e
return wrapper

View file

@ -20,6 +20,7 @@ model_list:
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
alerting: ["slack", "email"]
litellm_settings: litellm_settings:
success_callback: ["prometheus"] success_callback: ["prometheus"]

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,8 @@ import litellm
import backoff import backoff
import traceback import traceback
from pydantic import BaseModel from pydantic import BaseModel
import litellm.litellm_core_utils
import litellm.litellm_core_utils.litellm_logging
from litellm.proxy._types import ( from litellm.proxy._types import (
UserAPIKeyAuth, UserAPIKeyAuth,
DynamoDBArgs, DynamoDBArgs,
@ -266,7 +268,9 @@ class ProxyLogging:
+ litellm.failure_callback + litellm.failure_callback
) )
) )
litellm.utils.set_callbacks(callback_list=callback_list) litellm.litellm_core_utils.litellm_logging.set_callbacks(
callback_list=callback_list
)
# The actual implementation of the function # The actual implementation of the function
async def pre_call_hook( async def pre_call_hook(
@ -331,7 +335,9 @@ class ProxyLogging:
return data return data
except Exception as e: except Exception as e:
if "litellm_logging_obj" in data: if "litellm_logging_obj" in data:
logging_obj: litellm.utils.Logging = data["litellm_logging_obj"] logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[
"litellm_logging_obj"
]
## ASYNC FAILURE HANDLER ## ## ASYNC FAILURE HANDLER ##
error_message = "" error_message = ""
@ -455,6 +461,7 @@ class ProxyLogging:
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
extra_kwargs = {} extra_kwargs = {}
alerting_metadata = {}
if request_data is not None: if request_data is not None:
_url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert( _url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
request_data=request_data request_data=request_data
@ -462,7 +469,12 @@ class ProxyLogging:
if _url is not None: if _url is not None:
extra_kwargs["🪢 Langfuse Trace"] = _url extra_kwargs["🪢 Langfuse Trace"] = _url
formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url) formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url)
if (
"metadata" in request_data
and request_data["metadata"].get("alerting_metadata", None) is not None
and isinstance(request_data["metadata"]["alerting_metadata"], dict)
):
alerting_metadata = request_data["metadata"]["alerting_metadata"]
for client in self.alerting: for client in self.alerting:
if client == "slack": if client == "slack":
await self.slack_alerting_instance.send_alert( await self.slack_alerting_instance.send_alert(
@ -470,6 +482,7 @@ class ProxyLogging:
level=level, level=level,
alert_type=alert_type, alert_type=alert_type,
user_info=None, user_info=None,
alerting_metadata=alerting_metadata,
**extra_kwargs, **extra_kwargs,
) )
elif client == "sentry": elif client == "sentry":
@ -510,7 +523,7 @@ class ProxyLogging:
) )
if hasattr(self, "service_logging_obj"): if hasattr(self, "service_logging_obj"):
self.service_logging_obj.async_service_failure_hook( await self.service_logging_obj.async_service_failure_hook(
service=ServiceTypes.DB, service=ServiceTypes.DB,
duration=duration, duration=duration,
error=error_message, error=error_message,
@ -1960,6 +1973,9 @@ async def send_email(receiver_email, subject, html):
email_message["From"] = sender_email email_message["From"] = sender_email
email_message["To"] = receiver_email email_message["To"] = receiver_email
email_message["Subject"] = subject email_message["Subject"] = subject
verbose_proxy_logger.debug(
"sending email from %s to %s", sender_email, receiver_email
)
# Attach the body to the email # Attach the body to the email
email_message.attach(MIMEText(html, "html")) email_message.attach(MIMEText(html, "html"))
@ -2115,6 +2131,16 @@ def _extract_from_regex(duration: str) -> Tuple[int, str]:
return value, unit return value, unit
def get_last_day_of_month(year, month):
# Handle December case
if month == 12:
return 31
# Next month is January, so subtract a day from March 1st
next_month = datetime(year=year, month=month + 1, day=1)
last_day_of_month = (next_month - timedelta(days=1)).day
return last_day_of_month
def _duration_in_seconds(duration: str) -> int: def _duration_in_seconds(duration: str) -> int:
""" """
Parameters: Parameters:
@ -2141,12 +2167,28 @@ def _duration_in_seconds(duration: str) -> int:
now = time.time() now = time.time()
current_time = datetime.fromtimestamp(now) current_time = datetime.fromtimestamp(now)
# Calculate the first day of the next month
if current_time.month == 12: if current_time.month == 12:
next_month = datetime(year=current_time.year + 1, month=1, day=1) target_year = current_time.year + 1
target_month = 1
else: else:
target_year = current_time.year
target_month = current_time.month + value
# Determine the day to set for next month
target_day = current_time.day
last_day_of_target_month = get_last_day_of_month(target_year, target_month)
if target_day > last_day_of_target_month:
target_day = last_day_of_target_month
next_month = datetime( next_month = datetime(
year=current_time.year, month=current_time.month + value, day=1 year=target_year,
month=target_month,
day=target_day,
hour=current_time.hour,
minute=current_time.minute,
second=current_time.second,
microsecond=current_time.microsecond,
) )
# Calculate the duration until the first day of the next month # Calculate the duration until the first day of the next month
@ -2718,47 +2760,6 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
return return
def _is_user_proxy_admin(user_id_information: Optional[list]):
if user_id_information is None:
return False
if len(user_id_information) == 0 or user_id_information[0] is None:
return False
_user = user_id_information[0]
if (
_user.get("user_role", None) is not None
and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
):
return True
# if user_id_information contains litellm-proxy-budget
# get first user_id that is not litellm-proxy-budget
for user in user_id_information:
if user.get("user_id") != "litellm-proxy-budget":
_user = user
break
if (
_user.get("user_role", None) is not None
and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
):
return True
return False
def _get_user_role(user_id_information: Optional[list]):
if user_id_information is None:
return None
if len(user_id_information) == 0 or user_id_information[0] is None:
return None
_user = user_id_information[0]
return _user.get("user_role")
def encrypt_value(value: str, master_key: str): def encrypt_value(value: str, master_key: str):
import hashlib import hashlib
import nacl.secret import nacl.secret

View file

@ -66,6 +66,7 @@ from litellm.types.llms.openai import (
) )
from litellm.scheduler import Scheduler, FlowItem from litellm.scheduler import Scheduler, FlowItem
from typing import Iterable from typing import Iterable
from litellm.router_utils.handle_error import send_llm_exception_alert
class Router: class Router:
@ -108,6 +109,7 @@ class Router:
] = None, # generic fallbacks, works across all deployments ] = None, # generic fallbacks, works across all deployments
fallbacks: List = [], fallbacks: List = [],
context_window_fallbacks: List = [], context_window_fallbacks: List = [],
content_policy_fallbacks: List = [],
model_group_alias: Optional[dict] = {}, model_group_alias: Optional[dict] = {},
enable_pre_call_checks: bool = False, enable_pre_call_checks: bool = False,
retry_after: int = 0, # min time to wait before retrying a failed request retry_after: int = 0, # min time to wait before retrying a failed request
@ -311,6 +313,12 @@ class Router:
self.context_window_fallbacks = ( self.context_window_fallbacks = (
context_window_fallbacks or litellm.context_window_fallbacks context_window_fallbacks or litellm.context_window_fallbacks
) )
_content_policy_fallbacks = (
content_policy_fallbacks or litellm.content_policy_fallbacks
)
self.validate_fallbacks(fallback_param=_content_policy_fallbacks)
self.content_policy_fallbacks = _content_policy_fallbacks
self.total_calls: defaultdict = defaultdict( self.total_calls: defaultdict = defaultdict(
int int
) # dict to store total calls made to each model ) # dict to store total calls made to each model
@ -576,6 +584,14 @@ class Router:
return response return response
except Exception as e: except Exception as e:
asyncio.create_task(
send_llm_exception_alert(
litellm_router_instance=self,
request_kwargs=kwargs,
error_traceback_str=traceback.format_exc(),
original_exception=e,
)
)
raise e raise e
async def _acompletion( async def _acompletion(
@ -1097,6 +1113,14 @@ class Router:
return response return response
except Exception as e: except Exception as e:
asyncio.create_task(
send_llm_exception_alert(
litellm_router_instance=self,
request_kwargs=kwargs,
error_traceback_str=traceback.format_exc(),
original_exception=e,
)
)
raise e raise e
async def _aimage_generation(self, prompt: str, model: str, **kwargs): async def _aimage_generation(self, prompt: str, model: str, **kwargs):
@ -1221,6 +1245,14 @@ class Router:
return response return response
except Exception as e: except Exception as e:
asyncio.create_task(
send_llm_exception_alert(
litellm_router_instance=self,
request_kwargs=kwargs,
error_traceback_str=traceback.format_exc(),
original_exception=e,
)
)
raise e raise e
async def _atranscription(self, file: BinaryIO, model: str, **kwargs): async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
@ -1387,6 +1419,14 @@ class Router:
return response return response
except Exception as e: except Exception as e:
asyncio.create_task(
send_llm_exception_alert(
litellm_router_instance=self,
request_kwargs=kwargs,
error_traceback_str=traceback.format_exc(),
original_exception=e,
)
)
raise e raise e
async def amoderation(self, model: str, input: str, **kwargs): async def amoderation(self, model: str, input: str, **kwargs):
@ -1402,6 +1442,14 @@ class Router:
return response return response
except Exception as e: except Exception as e:
asyncio.create_task(
send_llm_exception_alert(
litellm_router_instance=self,
request_kwargs=kwargs,
error_traceback_str=traceback.format_exc(),
original_exception=e,
)
)
raise e raise e
async def _amoderation(self, model: str, input: str, **kwargs): async def _amoderation(self, model: str, input: str, **kwargs):
@ -1546,6 +1594,14 @@ class Router:
return response return response
except Exception as e: except Exception as e:
asyncio.create_task(
send_llm_exception_alert(
litellm_router_instance=self,
request_kwargs=kwargs,
error_traceback_str=traceback.format_exc(),
original_exception=e,
)
)
raise e raise e
async def _atext_completion(self, model: str, prompt: str, **kwargs): async def _atext_completion(self, model: str, prompt: str, **kwargs):
@ -1741,6 +1797,14 @@ class Router:
response = await self.async_function_with_fallbacks(**kwargs) response = await self.async_function_with_fallbacks(**kwargs)
return response return response
except Exception as e: except Exception as e:
asyncio.create_task(
send_llm_exception_alert(
litellm_router_instance=self,
request_kwargs=kwargs,
error_traceback_str=traceback.format_exc(),
original_exception=e,
)
)
raise e raise e
async def _aembedding(self, input: Union[str, List], model: str, **kwargs): async def _aembedding(self, input: Union[str, List], model: str, **kwargs):
@ -1998,6 +2062,9 @@ class Router:
context_window_fallbacks = kwargs.get( context_window_fallbacks = kwargs.get(
"context_window_fallbacks", self.context_window_fallbacks "context_window_fallbacks", self.context_window_fallbacks
) )
content_policy_fallbacks = kwargs.get(
"content_policy_fallbacks", self.content_policy_fallbacks
)
try: try:
if mock_testing_fallbacks is not None and mock_testing_fallbacks == True: if mock_testing_fallbacks is not None and mock_testing_fallbacks == True:
raise Exception( raise Exception(
@ -2016,7 +2083,10 @@ class Router:
if ( if (
hasattr(e, "status_code") hasattr(e, "status_code")
and e.status_code == 400 # type: ignore and e.status_code == 400 # type: ignore
and not isinstance(e, litellm.ContextWindowExceededError) and not (
isinstance(e, litellm.ContextWindowExceededError)
or isinstance(e, litellm.ContentPolicyViolationError)
)
): # don't retry a malformed request ): # don't retry a malformed request
raise e raise e
if ( if (
@ -2034,6 +2104,39 @@ class Router:
if fallback_model_group is None: if fallback_model_group is None:
raise original_exception raise original_exception
for mg in fallback_model_group:
"""
Iterate through the model groups and try calling that deployment
"""
try:
kwargs["model"] = mg
kwargs.setdefault("metadata", {}).update(
{"model_group": mg}
) # update model_group used, if fallbacks are done
response = await self.async_function_with_retries(
*args, **kwargs
)
verbose_router_logger.info(
"Successful fallback b/w models."
)
return response
except Exception as e:
pass
elif (
isinstance(e, litellm.ContentPolicyViolationError)
and content_policy_fallbacks is not None
):
fallback_model_group = None
for (
item
) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
if list(item.keys())[0] == model_group:
fallback_model_group = item[model_group]
break
if fallback_model_group is None:
raise original_exception
for mg in fallback_model_group: for mg in fallback_model_group:
""" """
Iterate through the model groups and try calling that deployment Iterate through the model groups and try calling that deployment
@ -2114,6 +2217,9 @@ class Router:
context_window_fallbacks = kwargs.pop( context_window_fallbacks = kwargs.pop(
"context_window_fallbacks", self.context_window_fallbacks "context_window_fallbacks", self.context_window_fallbacks
) )
content_policy_fallbacks = kwargs.pop(
"content_policy_fallbacks", self.content_policy_fallbacks
)
num_retries = kwargs.pop("num_retries") num_retries = kwargs.pop("num_retries")
@ -2141,6 +2247,7 @@ class Router:
healthy_deployments=_healthy_deployments, healthy_deployments=_healthy_deployments,
context_window_fallbacks=context_window_fallbacks, context_window_fallbacks=context_window_fallbacks,
regular_fallbacks=fallbacks, regular_fallbacks=fallbacks,
content_policy_fallbacks=content_policy_fallbacks,
) )
# decides how long to sleep before retry # decides how long to sleep before retry
@ -2206,10 +2313,12 @@ class Router:
error: Exception, error: Exception,
healthy_deployments: Optional[List] = None, healthy_deployments: Optional[List] = None,
context_window_fallbacks: Optional[List] = None, context_window_fallbacks: Optional[List] = None,
content_policy_fallbacks: Optional[List] = None,
regular_fallbacks: Optional[List] = None, regular_fallbacks: Optional[List] = None,
): ):
""" """
1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None 1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
2. raise an exception for ContentPolicyViolationError if content_policy_fallbacks is not None
2. raise an exception for RateLimitError if 2. raise an exception for RateLimitError if
- there are no fallbacks - there are no fallbacks
@ -2219,13 +2328,19 @@ class Router:
if healthy_deployments is not None and isinstance(healthy_deployments, list): if healthy_deployments is not None and isinstance(healthy_deployments, list):
_num_healthy_deployments = len(healthy_deployments) _num_healthy_deployments = len(healthy_deployments)
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR / CONTENT POLICY VIOLATION ERROR w/ fallbacks available / Bad Request Error
if ( if (
isinstance(error, litellm.ContextWindowExceededError) isinstance(error, litellm.ContextWindowExceededError)
and context_window_fallbacks is not None and context_window_fallbacks is not None
): ):
raise error raise error
if (
isinstance(error, litellm.ContentPolicyViolationError)
and content_policy_fallbacks is not None
):
raise error
# Error we should only retry if there are other deployments # Error we should only retry if there are other deployments
if isinstance(error, openai.RateLimitError): if isinstance(error, openai.RateLimitError):
if ( if (
@ -2256,6 +2371,9 @@ class Router:
context_window_fallbacks = kwargs.get( context_window_fallbacks = kwargs.get(
"context_window_fallbacks", self.context_window_fallbacks "context_window_fallbacks", self.context_window_fallbacks
) )
content_policy_fallbacks = kwargs.get(
"content_policy_fallbacks", self.content_policy_fallbacks
)
try: try:
if mock_testing_fallbacks is not None and mock_testing_fallbacks == True: if mock_testing_fallbacks is not None and mock_testing_fallbacks == True:
raise Exception( raise Exception(
@ -2271,7 +2389,10 @@ class Router:
if ( if (
hasattr(e, "status_code") hasattr(e, "status_code")
and e.status_code == 400 # type: ignore and e.status_code == 400 # type: ignore
and not isinstance(e, litellm.ContextWindowExceededError) and not (
isinstance(e, litellm.ContextWindowExceededError)
or isinstance(e, litellm.ContentPolicyViolationError)
)
): # don't retry a malformed request ): # don't retry a malformed request
raise e raise e
@ -2294,6 +2415,37 @@ class Router:
if fallback_model_group is None: if fallback_model_group is None:
raise original_exception raise original_exception
for mg in fallback_model_group:
"""
Iterate through the model groups and try calling that deployment
"""
try:
## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
kwargs["model"] = mg
kwargs.setdefault("metadata", {}).update(
{"model_group": mg}
) # update model_group used, if fallbacks are done
response = self.function_with_fallbacks(*args, **kwargs)
return response
except Exception as e:
pass
elif (
isinstance(e, litellm.ContentPolicyViolationError)
and content_policy_fallbacks is not None
):
fallback_model_group = None
for (
item
) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
if list(item.keys())[0] == model_group:
fallback_model_group = item[model_group]
break
if fallback_model_group is None:
raise original_exception
for mg in fallback_model_group: for mg in fallback_model_group:
""" """
Iterate through the model groups and try calling that deployment Iterate through the model groups and try calling that deployment
@ -2400,6 +2552,9 @@ class Router:
context_window_fallbacks = kwargs.pop( context_window_fallbacks = kwargs.pop(
"context_window_fallbacks", self.context_window_fallbacks "context_window_fallbacks", self.context_window_fallbacks
) )
content_policy_fallbacks = kwargs.pop(
"content_policy_fallbacks", self.content_policy_fallbacks
)
try: try:
# if the function call is successful, no exception will be raised and we'll break out of the loop # if the function call is successful, no exception will be raised and we'll break out of the loop
@ -2419,6 +2574,7 @@ class Router:
healthy_deployments=_healthy_deployments, healthy_deployments=_healthy_deployments,
context_window_fallbacks=context_window_fallbacks, context_window_fallbacks=context_window_fallbacks,
regular_fallbacks=fallbacks, regular_fallbacks=fallbacks,
content_policy_fallbacks=content_policy_fallbacks,
) )
# decides how long to sleep before retry # decides how long to sleep before retry
@ -4570,6 +4726,8 @@ class Router:
default_webhook_url=router_alerting_config.webhook_url, default_webhook_url=router_alerting_config.webhook_url,
) )
self.slack_alerting_logger = _slack_alerting_logger
litellm.callbacks.append(_slack_alerting_logger) litellm.callbacks.append(_slack_alerting_logger)
litellm.success_callback.append( litellm.success_callback.append(
_slack_alerting_logger.response_taking_too_long_callback _slack_alerting_logger.response_taking_too_long_callback

View file

@ -0,0 +1,53 @@
import asyncio
import traceback
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from litellm.router import Router as _Router
LitellmRouter = _Router
else:
LitellmRouter = Any
async def send_llm_exception_alert(
litellm_router_instance: LitellmRouter,
request_kwargs: dict,
error_traceback_str: str,
original_exception,
):
"""
Sends a Slack / MS Teams alert for the LLM API call failure.
Parameters:
litellm_router_instance (_Router): The LitellmRouter instance.
original_exception (Any): The original exception that occurred.
Returns:
None
"""
if litellm_router_instance is None:
return
if not hasattr(litellm_router_instance, "slack_alerting_logger"):
return
if litellm_router_instance.slack_alerting_logger is None:
return
if "proxy_server_request" in request_kwargs:
# Do not send any alert if it's a request from litellm proxy server request
# the proxy is already instrumented to send LLM API call failures
return
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
exception_str = str(original_exception)
if litellm_debug_info is not None:
exception_str += litellm_debug_info
exception_str += f"\n\n{error_traceback_str[:2000]}"
await litellm_router_instance.slack_alerting_logger.send_alert(
message=f"LLM API call failed: `{exception_str}`",
level="High",
alert_type="llm_exceptions",
)

View file

@ -25,6 +25,9 @@ import pytest
from litellm.router import AlertingConfig, Router from litellm.router import AlertingConfig, Router
from litellm.proxy._types import CallInfo from litellm.proxy._types import CallInfo
from openai import APIError from openai import APIError
from litellm.router import AlertingConfig
import litellm
import os
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -159,6 +162,29 @@ async def test_response_taking_too_long_callback(slack_alerting):
mock_send_alert.assert_awaited_once() mock_send_alert.assert_awaited_once()
@pytest.mark.asyncio
async def test_alerting_metadata(slack_alerting):
"""
Test alerting_metadata is propogated correctly for response taking too long
"""
start_time = datetime.now()
end_time = start_time + timedelta(seconds=301)
kwargs = {
"model": "test_model",
"messages": "test_messages",
"litellm_params": {"metadata": {"alerting_metadata": {"hello": "world"}}},
}
with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
## RESPONSE TAKING TOO LONG
await slack_alerting.response_taking_too_long_callback(
kwargs, None, start_time, end_time
)
mock_send_alert.assert_awaited_once()
assert "hello" in mock_send_alert.call_args[1]["alerting_metadata"]
# Test for budget crossed # Test for budget crossed
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_budget_alerts_crossed(slack_alerting): async def test_budget_alerts_crossed(slack_alerting):
@ -204,7 +230,9 @@ async def test_send_alert(slack_alerting):
slack_alerting.async_http_handler, "post", new=AsyncMock() slack_alerting.async_http_handler, "post", new=AsyncMock()
) as mock_post: ) as mock_post:
mock_post.return_value.status_code = 200 mock_post.return_value.status_code = 200
await slack_alerting.send_alert("Test message", "Low", "budget_alerts") await slack_alerting.send_alert(
"Test message", "Low", "budget_alerts", alerting_metadata={}
)
mock_post.assert_awaited_once() mock_post.assert_awaited_once()
@ -263,7 +291,7 @@ async def test_daily_reports_completion(slack_alerting):
await asyncio.sleep(3) await asyncio.sleep(3)
response_val = await slack_alerting.send_daily_reports(router=router) response_val = await slack_alerting.send_daily_reports(router=router)
assert response_val == True assert response_val is True
mock_send_alert.assert_awaited_once() mock_send_alert.assert_awaited_once()
@ -288,7 +316,7 @@ async def test_daily_reports_completion(slack_alerting):
await asyncio.sleep(3) await asyncio.sleep(3)
response_val = await slack_alerting.send_daily_reports(router=router) response_val = await slack_alerting.send_daily_reports(router=router)
assert response_val == True assert response_val is True
mock_send_alert.assert_awaited() mock_send_alert.assert_awaited()
@ -743,3 +771,37 @@ async def test_region_outage_alerting_called(
mock_send_alert.assert_called_once() mock_send_alert.assert_called_once()
else: else:
mock_send_alert.assert_not_called() mock_send_alert.assert_not_called()
@pytest.mark.asyncio
@pytest.mark.skip(reason="test only needs to run locally ")
async def test_alerting():
router = litellm.Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "bad_key",
},
}
],
debug_level="DEBUG",
set_verbose=True,
alerting_config=AlertingConfig(
alerting_threshold=10, # threshold for slow / hanging llm responses (in seconds). Defaults to 300 seconds
webhook_url=os.getenv(
"SLACK_WEBHOOK_URL"
), # webhook you want to send alerts to
),
)
try:
await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
except:
pass
finally:
await asyncio.sleep(3)

View file

@ -29,19 +29,22 @@ import pytest, logging, asyncio
import litellm, asyncio import litellm, asyncio
from litellm.proxy.proxy_server import ( from litellm.proxy.proxy_server import (
new_user, new_user,
generate_key_fn,
user_api_key_auth, user_api_key_auth,
user_update, user_update,
user_info,
block_user,
)
from litellm.proxy.management_endpoints.key_management_endpoints import (
delete_key_fn, delete_key_fn,
info_key_fn, info_key_fn,
update_key_fn, update_key_fn,
generate_key_fn, generate_key_fn,
generate_key_helper_fn, generate_key_helper_fn,
)
from litellm.proxy.spend_reporting_endpoints.spend_management_endpoints import (
spend_user_fn, spend_user_fn,
spend_key_fn, spend_key_fn,
view_spend_logs, view_spend_logs,
user_info,
block_user,
) )
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger

Some files were not shown because too many files have changed in this diff Show more