mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge branch 'BerriAI:main' into main
This commit is contained in:
commit
1b644fa0fc
142 changed files with 19096 additions and 13214 deletions
22
.github/workflows/ghcr_deploy.yml
vendored
22
.github/workflows/ghcr_deploy.yml
vendored
|
@ -25,6 +25,11 @@ jobs:
|
|||
if: github.repository == 'BerriAI/litellm'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
@ -41,12 +46,14 @@ jobs:
|
|||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }}
|
||||
-
|
||||
name: Build and push litellm-database image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
file: Dockerfile.database
|
||||
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
|
||||
|
@ -54,6 +61,7 @@ jobs:
|
|||
name: Build and push litellm-spend-logs image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
file: ./litellm-js/spend-logs/Dockerfile
|
||||
tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
|
||||
|
@ -68,6 +76,8 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
|
@ -92,7 +102,7 @@ jobs:
|
|||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8
|
||||
with:
|
||||
context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
|
||||
context: .
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }} # if a tag is provided, use that, otherwise use the release tag, and if neither is available, use 'latest'
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
@ -106,6 +116,8 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
|
@ -128,7 +140,7 @@ jobs:
|
|||
- name: Build and push Database Docker image
|
||||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||
with:
|
||||
context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
|
||||
context: .
|
||||
file: Dockerfile.database
|
||||
push: true
|
||||
tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||
|
@ -143,6 +155,8 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
|
@ -165,7 +179,7 @@ jobs:
|
|||
- name: Build and push Database Docker image
|
||||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||
with:
|
||||
context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}}
|
||||
context: .
|
||||
file: ./litellm-js/spend-logs/Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||
|
@ -176,6 +190,8 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
|
|
|
@ -1,4 +1,19 @@
|
|||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
# - id: mypy
|
||||
# name: mypy
|
||||
# entry: python3 -m mypy --ignore-missing-imports
|
||||
# language: system
|
||||
# types: [python]
|
||||
# files: ^litellm/
|
||||
- id: isort
|
||||
name: isort
|
||||
entry: isort
|
||||
language: system
|
||||
types: [python]
|
||||
files: litellm/.*\.py
|
||||
exclude: ^litellm/__init__.py$
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 24.2.0
|
||||
hooks:
|
||||
|
@ -16,11 +31,10 @@ repos:
|
|||
name: Check if files match
|
||||
entry: python3 ci_cd/check_files_match.py
|
||||
language: system
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: mypy
|
||||
entry: python3 -m mypy --ignore-missing-imports
|
||||
language: system
|
||||
types: [python]
|
||||
files: ^litellm/
|
||||
# - id: check-file-length
|
||||
# name: Check file length
|
||||
# entry: python check_file_length.py
|
||||
# args: ["10000"] # set your desired maximum number of lines
|
||||
# language: python
|
||||
# files: litellm/.*\.py
|
||||
# exclude: ^litellm/tests/
|
28
check_file_length.py
Normal file
28
check_file_length.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import sys
|
||||
|
||||
|
||||
def check_file_length(max_lines, filenames):
|
||||
bad_files = []
|
||||
for filename in filenames:
|
||||
with open(filename, "r") as file:
|
||||
lines = file.readlines()
|
||||
if len(lines) > max_lines:
|
||||
bad_files.append((filename, len(lines)))
|
||||
return bad_files
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
max_lines = int(sys.argv[1])
|
||||
filenames = sys.argv[2:]
|
||||
|
||||
bad_files = check_file_length(max_lines, filenames)
|
||||
if bad_files:
|
||||
bad_files.sort(
|
||||
key=lambda x: x[1], reverse=True
|
||||
) # Sort files by length in descending order
|
||||
for filename, length in bad_files:
|
||||
print(f"{filename}: {length} lines")
|
||||
|
||||
sys.exit(1)
|
||||
else:
|
||||
sys.exit(0)
|
|
@ -162,7 +162,7 @@ def completion(
|
|||
|
||||
- `function`: *object* - Required.
|
||||
|
||||
- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {"type: "function", "function": {"name": "my_function"}} forces the model to call that function.
|
||||
- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via `{"type: "function", "function": {"name": "my_function"}}` forces the model to call that function.
|
||||
|
||||
- `none` is the default when no functions are present. `auto` is the default if functions are present.
|
||||
|
||||
|
|
|
@ -1,90 +0,0 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import QueryParamReader from '../../src/components/queryParamReader.js'
|
||||
|
||||
# [Beta] Monitor Logs in Production
|
||||
|
||||
:::note
|
||||
|
||||
This is in beta. Expect frequent updates, as we improve based on your feedback.
|
||||
|
||||
:::
|
||||
|
||||
LiteLLM provides an integration to let you monitor logs in production.
|
||||
|
||||
👉 Jump to our sample LiteLLM Dashboard: https://admin.litellm.ai/
|
||||
|
||||
|
||||
<Image img={require('../../img/alt_dashboard.png')} alt="Dashboard" />
|
||||
|
||||
## Debug your first logs
|
||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_OpenAI.ipynb">
|
||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||
</a>
|
||||
|
||||
|
||||
### 1. Get your LiteLLM Token
|
||||
|
||||
Go to [admin.litellm.ai](https://admin.litellm.ai/) and copy the code snippet with your unique token
|
||||
|
||||
<Image img={require('../../img/hosted_debugger_usage_page.png')} alt="Usage" />
|
||||
|
||||
### 2. Set up your environment
|
||||
|
||||
**Add it to your .env**
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
os.env["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
|
||||
|
||||
```
|
||||
|
||||
**Turn on LiteLLM Client**
|
||||
```python
|
||||
import litellm
|
||||
litellm.client = True
|
||||
```
|
||||
|
||||
### 3. Make a normal `completion()` call
|
||||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
import os
|
||||
|
||||
# set env variables
|
||||
os.environ["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token
|
||||
os.environ["OPENAI_API_KEY"] = "openai key"
|
||||
|
||||
litellm.use_client = True # enable logging dashboard
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||
|
||||
# openai call
|
||||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||
```
|
||||
|
||||
Your `completion()` call print with a link to your session dashboard (https://admin.litellm.ai/<your_unique_token>)
|
||||
|
||||
In the above case it would be: [`admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb`](https://admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb)
|
||||
|
||||
Click on your personal dashboard link. Here's how you can find it 👇
|
||||
|
||||
<Image img={require('../../img/dash_output.png')} alt="Dashboard" />
|
||||
|
||||
[👋 Tell us if you need better privacy controls](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version?month=2023-08)
|
||||
|
||||
### 3. Review request log
|
||||
|
||||
Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider.
|
||||
|
||||
|
||||
|
||||
|
||||
Ah! So we can see that this request was made to a **Baseten** (see litellm_params > custom_llm_provider) for a model with ID - **7qQNLDB** (see model). The message sent was - `"Hey, how's it going?"` and the response received was - `"As an AI language model, I don't have feelings or emotions, but I can assist you with your queries. How can I assist you today?"`
|
||||
|
||||
<Image img={require('../../img/dashboard_log.png')} alt="Dashboard Log Row" />
|
||||
|
||||
:::info
|
||||
|
||||
🎉 Congratulations! You've successfully debugger your first log!
|
||||
|
||||
:::
|
|
@ -1,3 +1,5 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
|
||||
# Promptlayer Tutorial
|
||||
|
||||
Promptlayer is a platform for prompt engineers. Log OpenAI requests. Search usage history. Track performance. Visually manage prompt templates.
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# OpenAI (Text Completion)
|
||||
|
||||
LiteLLM supports OpenAI text completion models
|
||||
|
|
|
@ -208,7 +208,7 @@ print(response)
|
|||
|
||||
Instead of using the `custom_llm_provider` arg to specify which provider you're using (e.g. together ai), you can just pass the provider name as part of the model name, and LiteLLM will parse it out.
|
||||
|
||||
Expected format: <custom_llm_provider>/<model_name>
|
||||
Expected format: `<custom_llm_provider>/<model_name>`
|
||||
|
||||
e.g. completion(model="together_ai/togethercomputer/Llama-2-7B-32K-Instruct", ...)
|
||||
|
||||
|
|
|
@ -32,41 +32,33 @@ Get a slack webhook url from https://api.slack.com/messaging/webhooks
|
|||
|
||||
You can also use Discord Webhooks, see [here](#using-discord-webhooks)
|
||||
|
||||
### Step 2: Update config.yaml
|
||||
|
||||
- Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
|
||||
- Just for testing purposes, let's save a bad key to our proxy.
|
||||
Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
|
||||
|
||||
```bash
|
||||
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/<>/<>/<>"
|
||||
```
|
||||
|
||||
### Step 2: Setup Proxy
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
model_name: "azure-model"
|
||||
litellm_params:
|
||||
model: "azure/gpt-35-turbo"
|
||||
api_key: "my-bad-key" # 👈 bad key
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
|
||||
|
||||
environment_variables:
|
||||
SLACK_WEBHOOK_URL: "https://hooks.slack.com/services/<>/<>/<>"
|
||||
SLACK_DAILY_REPORT_FREQUENCY: "86400" # 24 hours; Optional: defaults to 12 hours
|
||||
```
|
||||
|
||||
|
||||
### Step 3: Start proxy
|
||||
|
||||
Start proxy
|
||||
```bash
|
||||
$ litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
## Testing Alerting is Setup Correctly
|
||||
|
||||
Make a GET request to `/health/services`, expect to see a test slack alert in your provided webhook slack channel
|
||||
### Step 3: Test it!
|
||||
|
||||
```shell
|
||||
curl -X GET 'http://localhost:4000/health/services?service=slack' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
|
||||
```bash
|
||||
curl -X GET 'http://0.0.0.0:4000/health/services?service=slack' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
## Advanced - Redacting Messages from Alerts
|
||||
|
@ -84,7 +76,34 @@ litellm_settings:
|
|||
```
|
||||
|
||||
|
||||
## Advanced - Add Metadata to alerts
|
||||
|
||||
Add alerting metadata to proxy calls for debugging.
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages = [],
|
||||
extra_body={
|
||||
"metadata": {
|
||||
"alerting_metadata": {
|
||||
"hello": "world"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
**Expected Response**
|
||||
|
||||
<Image img={require('../../img/alerting_metadata.png')}/>
|
||||
|
||||
## Advanced - Opting into specific alert types
|
||||
|
||||
|
|
|
@ -669,7 +669,7 @@ Once the stack is created, get the DatabaseURL of the Database resource, copy th
|
|||
#### 3. Connect to the EC2 Instance and deploy litellm on the EC2 container
|
||||
From the EC2 console, connect to the instance created by the stack (e.g., using SSH).
|
||||
|
||||
Run the following command, replacing <database_url> with the value you copied in step 2
|
||||
Run the following command, replacing `<database_url>` with the value you copied in step 2
|
||||
|
||||
```shell
|
||||
docker run --name litellm-proxy \
|
||||
|
|
|
@ -5,6 +5,7 @@ import Image from '@theme/IdealImage';
|
|||
Send an Email to your users when:
|
||||
- A Proxy API Key is created for them
|
||||
- Their API Key crosses it's Budget
|
||||
- All Team members of a LiteLLM Team -> when the team crosses it's budget
|
||||
|
||||
<Image img={require('../../img/email_notifs.png')} style={{ width: '500px' }}/>
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Model Management
|
||||
Add new models + Get model info without restarting proxy.
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
|
||||
# LiteLLM Proxy Performance
|
||||
|
||||
### Throughput - 30% Increase
|
||||
|
|
|
@ -409,6 +409,28 @@ print(response)
|
|||
</Tabs>
|
||||
|
||||
|
||||
### Content Policy Fallbacks
|
||||
|
||||
Fallback across providers (e.g. from Azure OpenAI to Anthropic) if you hit content policy violation errors.
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo-small
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
|
||||
- model_name: claude-opus
|
||||
litellm_params:
|
||||
model: claude-3-opus-20240229
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
|
||||
litellm_settings:
|
||||
content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}]
|
||||
```
|
||||
|
||||
### EU-Region Filtering (Pre-Call Checks)
|
||||
|
||||
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
||||
|
|
|
@ -46,7 +46,7 @@ Possible values for `budget_duration`
|
|||
| `budget_duration="1m"` | every 1 min |
|
||||
| `budget_duration="1h"` | every 1 hour |
|
||||
| `budget_duration="1d"` | every 1 day |
|
||||
| `budget_duration="1mo"` | start of every month |
|
||||
| `budget_duration="1mo"` | every 1 month |
|
||||
|
||||
|
||||
### 2. Create a key for the `team`
|
||||
|
|
|
@ -413,6 +413,52 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Reset Budgets
|
||||
|
||||
Reset budgets across keys/internal users/teams/customers
|
||||
|
||||
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="users" label="Internal Users">
|
||||
|
||||
```bash
|
||||
curl 'http://0.0.0.0:4000/user/new' \
|
||||
--header 'Authorization: Bearer <your-master-key>' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"max_budget": 10,
|
||||
"budget_duration": 10s, # 👈 KEY CHANGE
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="keys" label="Keys">
|
||||
|
||||
```bash
|
||||
curl 'http://0.0.0.0:4000/key/generate' \
|
||||
--header 'Authorization: Bearer <your-master-key>' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"max_budget": 10,
|
||||
"budget_duration": 10s, # 👈 KEY CHANGE
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="teams" label="Teams">
|
||||
|
||||
```bash
|
||||
curl 'http://0.0.0.0:4000/team/new' \
|
||||
--header 'Authorization: Bearer <your-master-key>' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"max_budget": 10,
|
||||
"budget_duration": 10s, # 👈 KEY CHANGE
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Set Rate Limits
|
||||
|
||||
You can set:
|
||||
|
|
|
@ -790,84 +790,204 @@ If the error is a context window exceeded error, fall back to a larger model gro
|
|||
|
||||
Fallbacks are done in-order - ["gpt-3.5-turbo, "gpt-4", "gpt-4-32k"], will do 'gpt-3.5-turbo' first, then 'gpt-4', etc.
|
||||
|
||||
You can also set 'default_fallbacks', in case a specific model group is misconfigured / bad.
|
||||
You can also set `default_fallbacks`, in case a specific model group is misconfigured / bad.
|
||||
|
||||
There are 3 types of fallbacks:
|
||||
- `content_policy_fallbacks`: For litellm.ContentPolicyViolationError - LiteLLM maps content policy violation errors across providers [**See Code**](https://github.com/BerriAI/litellm/blob/89a43c872a1e3084519fb9de159bf52f5447c6c4/litellm/utils.py#L8495C27-L8495C54)
|
||||
- `context_window_fallbacks`: For litellm.ContextWindowExceededErrors - LiteLLM maps context window error messages across providers [**See Code**](https://github.com/BerriAI/litellm/blob/89a43c872a1e3084519fb9de159bf52f5447c6c4/litellm/utils.py#L8469)
|
||||
- `fallbacks`: For all remaining errors - e.g. litellm.RateLimitError
|
||||
|
||||
**Content Policy Violation Fallback**
|
||||
|
||||
Key change:
|
||||
|
||||
```python
|
||||
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}]
|
||||
```
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
|
||||
model_list = [
|
||||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "claude-2",
|
||||
"litellm_params": {
|
||||
"model": "claude-2",
|
||||
"api_key": "",
|
||||
"mock_response": Exception("content filtering policy"),
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800
|
||||
},
|
||||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800
|
||||
},
|
||||
{
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-functioncalling",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
"model_name": "my-fallback-model",
|
||||
"litellm_params": {
|
||||
"model": "claude-2",
|
||||
"api_key": "",
|
||||
"mock_response": "This works!",
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 1000000,
|
||||
"rpm": 9000
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo-16k", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo-16k",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 1000000,
|
||||
"rpm": 9000
|
||||
}
|
||||
]
|
||||
],
|
||||
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
|
||||
# fallbacks=[..], # [OPTIONAL]
|
||||
# context_window_fallbacks=[..], # [OPTIONAL]
|
||||
)
|
||||
|
||||
|
||||
router = Router(model_list=model_list,
|
||||
fallbacks=[{"azure/gpt-3.5-turbo": ["gpt-3.5-turbo"]}],
|
||||
default_fallbacks=["gpt-3.5-turbo-16k"],
|
||||
context_window_fallbacks=[{"azure/gpt-3.5-turbo-context-fallback": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}],
|
||||
set_verbose=True)
|
||||
|
||||
|
||||
user_message = "Hello, whats the weather in San Francisco??"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
# normal fallback call
|
||||
response = router.completion(model="azure/gpt-3.5-turbo", messages=messages)
|
||||
|
||||
# context window fallback call
|
||||
response = router.completion(model="azure/gpt-3.5-turbo-context-fallback", messages=messages)
|
||||
|
||||
print(f"response: {response}")
|
||||
response = router.completion(
|
||||
model="claude-2",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
In your proxy config.yaml just add this line 👇
|
||||
|
||||
```yaml
|
||||
router_settings:
|
||||
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}]
|
||||
```
|
||||
|
||||
Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
|
||||
# RUNNING on http://0.0.0.0:4000
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
**Context Window Exceeded Fallback**
|
||||
|
||||
Key change:
|
||||
|
||||
```python
|
||||
context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}]
|
||||
```
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "claude-2",
|
||||
"litellm_params": {
|
||||
"model": "claude-2",
|
||||
"api_key": "",
|
||||
"mock_response": Exception("prompt is too long"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "my-fallback-model",
|
||||
"litellm_params": {
|
||||
"model": "claude-2",
|
||||
"api_key": "",
|
||||
"mock_response": "This works!",
|
||||
},
|
||||
},
|
||||
],
|
||||
context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
|
||||
# fallbacks=[..], # [OPTIONAL]
|
||||
# content_policy_fallbacks=[..], # [OPTIONAL]
|
||||
)
|
||||
|
||||
response = router.completion(
|
||||
model="claude-2",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
In your proxy config.yaml just add this line 👇
|
||||
|
||||
```yaml
|
||||
router_settings:
|
||||
context_window_fallbacks=[{"claude-2": ["my-fallback-model"]}]
|
||||
```
|
||||
|
||||
Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
|
||||
# RUNNING on http://0.0.0.0:4000
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
**Regular Fallbacks**
|
||||
|
||||
Key change:
|
||||
|
||||
```python
|
||||
fallbacks=[{"claude-2": ["my-fallback-model"]}]
|
||||
```
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "claude-2",
|
||||
"litellm_params": {
|
||||
"model": "claude-2",
|
||||
"api_key": "",
|
||||
"mock_response": Exception("this is a rate limit error"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "my-fallback-model",
|
||||
"litellm_params": {
|
||||
"model": "claude-2",
|
||||
"api_key": "",
|
||||
"mock_response": "This works!",
|
||||
},
|
||||
},
|
||||
],
|
||||
fallbacks=[{"claude-2": ["my-fallback-model"]}], # 👈 KEY CHANGE
|
||||
# context_window_fallbacks=[..], # [OPTIONAL]
|
||||
# content_policy_fallbacks=[..], # [OPTIONAL]
|
||||
)
|
||||
|
||||
response = router.completion(
|
||||
model="claude-2",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
In your proxy config.yaml just add this line 👇
|
||||
|
||||
```yaml
|
||||
router_settings:
|
||||
fallbacks=[{"claude-2": ["my-fallback-model"]}]
|
||||
```
|
||||
|
||||
Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
|
||||
# RUNNING on http://0.0.0.0:4000
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Caching
|
||||
|
||||
|
|
|
@ -23,9 +23,13 @@ https://api.together.xyz/playground/chat?model=togethercomputer%2Fllama-2-70b-ch
|
|||
model_name = "together_ai/togethercomputer/llama-2-70b-chat"
|
||||
response = completion(model=model_name, messages=messages)
|
||||
print(response)
|
||||
```
|
||||
|
||||
|
||||
```
|
||||
|
||||
{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': "\n\nI'm not able to provide real-time weather information. However, I can suggest"}}], 'created': 1691629657.9288375, 'model': 'togethercomputer/llama-2-70b-chat', 'usage': {'prompt_tokens': 9, 'completion_tokens': 17, 'total_tokens': 26}}
|
||||
```
|
||||
|
||||
|
||||
LiteLLM handles the prompt formatting for Together AI's Llama2 models as well, converting your message to the
|
||||
|
|
BIN
docs/my-website/img/alerting_metadata.png
Normal file
BIN
docs/my-website/img/alerting_metadata.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 207 KiB |
8607
docs/my-website/package-lock.json
generated
8607
docs/my-website/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -14,17 +14,17 @@
|
|||
"write-heading-ids": "docusaurus write-heading-ids"
|
||||
},
|
||||
"dependencies": {
|
||||
"@docusaurus/core": "2.4.1",
|
||||
"@docusaurus/plugin-google-gtag": "^2.4.1",
|
||||
"@docusaurus/plugin-ideal-image": "^2.4.1",
|
||||
"@docusaurus/preset-classic": "2.4.1",
|
||||
"@docusaurus/core": "3.1.1",
|
||||
"@docusaurus/plugin-google-gtag": "3.1.1",
|
||||
"@docusaurus/plugin-ideal-image": "3.1.1",
|
||||
"@docusaurus/preset-classic": "3.1.1",
|
||||
"@mdx-js/react": "^1.6.22",
|
||||
"clsx": "^1.2.1",
|
||||
"docusaurus": "^1.14.7",
|
||||
"docusaurus-lunr-search": "^2.4.1",
|
||||
"prism-react-renderer": "^1.3.5",
|
||||
"react": "^17.0.2",
|
||||
"react-dom": "^17.0.2",
|
||||
"react": "^18.1.0",
|
||||
"react-dom": "^18.1.0",
|
||||
"sharp": "^0.32.6",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -122,236 +122,6 @@ async def ui_get_spend_by_tags(
|
|||
return {"spend_per_tag": ui_tags}
|
||||
|
||||
|
||||
async def view_spend_logs_from_clickhouse(
|
||||
api_key=None, user_id=None, request_id=None, start_date=None, end_date=None
|
||||
):
|
||||
verbose_logger.debug("Reading logs from Clickhouse")
|
||||
import os
|
||||
|
||||
# if user has setup clickhouse
|
||||
# TODO: Move this to be a helper function
|
||||
# querying clickhouse for this data
|
||||
import clickhouse_connect
|
||||
from datetime import datetime
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=os.getenv("CLICKHOUSE_HOST"),
|
||||
port=port,
|
||||
username=os.getenv("CLICKHOUSE_USERNAME", ""),
|
||||
password=os.getenv("CLICKHOUSE_PASSWORD", ""),
|
||||
)
|
||||
if (
|
||||
start_date is not None
|
||||
and isinstance(start_date, str)
|
||||
and end_date is not None
|
||||
and isinstance(end_date, str)
|
||||
):
|
||||
# Convert the date strings to datetime objects
|
||||
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
|
||||
# get top spend per day
|
||||
response = client.query(
|
||||
f"""
|
||||
SELECT
|
||||
toDate(startTime) AS day,
|
||||
sum(spend) AS total_spend
|
||||
FROM
|
||||
spend_logs
|
||||
WHERE
|
||||
toDate(startTime) BETWEEN toDate('2024-02-01') AND toDate('2024-02-29')
|
||||
GROUP BY
|
||||
day
|
||||
ORDER BY
|
||||
total_spend
|
||||
"""
|
||||
)
|
||||
|
||||
results = []
|
||||
result_rows = list(response.result_rows)
|
||||
for response in result_rows:
|
||||
current_row = {}
|
||||
current_row["users"] = {"example": 0.0}
|
||||
current_row["models"] = {}
|
||||
|
||||
current_row["spend"] = float(response[1])
|
||||
current_row["startTime"] = str(response[0])
|
||||
|
||||
# stubbed api_key
|
||||
current_row[""] = 0.0 # type: ignore
|
||||
results.append(current_row)
|
||||
|
||||
return results
|
||||
else:
|
||||
# check if spend logs exist, if it does then return last 10 logs, sorted in descending order of startTime
|
||||
response = client.query(
|
||||
"""
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
default.spend_logs
|
||||
ORDER BY
|
||||
startTime DESC
|
||||
LIMIT
|
||||
10
|
||||
"""
|
||||
)
|
||||
|
||||
# get size of spend logs
|
||||
num_rows = client.query("SELECT count(*) FROM default.spend_logs")
|
||||
num_rows = num_rows.result_rows[0][0]
|
||||
|
||||
# safely access num_rows.result_rows[0][0]
|
||||
if num_rows is None:
|
||||
num_rows = 0
|
||||
|
||||
raw_rows = list(response.result_rows)
|
||||
response_data = {
|
||||
"logs": raw_rows,
|
||||
"log_count": num_rows,
|
||||
}
|
||||
return response_data
|
||||
|
||||
|
||||
def _create_clickhouse_material_views(client=None, table_names=[]):
|
||||
# Create Materialized Views if they don't exist
|
||||
# Materialized Views send new inserted rows to the aggregate tables
|
||||
|
||||
verbose_logger.debug("Clickhouse: Creating Materialized Views")
|
||||
if "daily_aggregated_spend_per_model_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
|
||||
TO daily_aggregated_spend_per_model
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
model as model
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, model
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
|
||||
verbose_logger.debug(
|
||||
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
|
||||
)
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
|
||||
TO daily_aggregated_spend_per_api_key
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
api_key as api_key
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, api_key
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_user_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
|
||||
TO daily_aggregated_spend_per_user
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
user as user
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, user
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
|
||||
TO daily_aggregated_spend
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
||||
# Basic Logging works without this - this is only used for low latency reporting apis
|
||||
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
|
||||
|
||||
# Create Aggregeate Tables if they don't exist
|
||||
if "daily_aggregated_spend_per_model" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_model
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`model` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, model);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_api_key" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_api_key
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`api_key` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, api_key);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_user" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_user
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`user` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, user);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day);
|
||||
"""
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
def _forecast_daily_cost(data: list):
|
||||
import requests # type: ignore
|
||||
from datetime import datetime, timedelta
|
||||
|
|
|
@ -240,6 +240,7 @@ num_retries: Optional[int] = None # per model endpoint
|
|||
default_fallbacks: Optional[List] = None
|
||||
fallbacks: Optional[List] = None
|
||||
context_window_fallbacks: Optional[List] = None
|
||||
content_policy_fallbacks: Optional[List] = None
|
||||
allowed_fails: int = 0
|
||||
num_retries_per_request: Optional[int] = (
|
||||
None # for the request overall (incl. fallbacks + model retries)
|
||||
|
@ -714,6 +715,7 @@ openai_image_generation_models = ["dall-e-2", "dall-e-3"]
|
|||
|
||||
from .timeout import timeout
|
||||
from .cost_calculator import completion_cost
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from .utils import (
|
||||
client,
|
||||
exception_type,
|
||||
|
@ -722,12 +724,10 @@ from .utils import (
|
|||
token_counter,
|
||||
create_pretrained_tokenizer,
|
||||
create_tokenizer,
|
||||
cost_per_token,
|
||||
supports_function_calling,
|
||||
supports_parallel_function_calling,
|
||||
supports_vision,
|
||||
get_litellm_params,
|
||||
Logging,
|
||||
acreate,
|
||||
get_model_list,
|
||||
get_max_tokens,
|
||||
|
@ -747,9 +747,10 @@ from .utils import (
|
|||
get_first_chars_messages,
|
||||
ModelResponse,
|
||||
ImageResponse,
|
||||
ImageObject,
|
||||
get_provider_fields,
|
||||
)
|
||||
|
||||
from .types.utils import ImageObject
|
||||
from .llms.huggingface_restapi import HuggingfaceConfig
|
||||
from .llms.anthropic import AnthropicConfig
|
||||
from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig
|
||||
|
@ -826,4 +827,4 @@ from .router import Router
|
|||
from .assistants.main import *
|
||||
from .batches.main import *
|
||||
from .scheduler import *
|
||||
from .cost_calculator import response_cost_calculator
|
||||
from .cost_calculator import response_cost_calculator, cost_per_token
|
||||
|
|
|
@ -3,10 +3,17 @@ from logging import Formatter
|
|||
import traceback
|
||||
|
||||
set_verbose = False
|
||||
|
||||
if set_verbose is True:
|
||||
logging.warning(
|
||||
"`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
|
||||
)
|
||||
json_logs = bool(os.getenv("JSON_LOGS", False))
|
||||
# Create a handler for the logger (you may need to adapt this based on your needs)
|
||||
log_level = os.getenv("LITELLM_LOG", "ERROR")
|
||||
numeric_level: str = getattr(logging, log_level.upper())
|
||||
handler = logging.StreamHandler()
|
||||
handler.setLevel(logging.DEBUG)
|
||||
handler.setLevel(numeric_level)
|
||||
|
||||
|
||||
class JsonFormatter(Formatter):
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# What is this?
|
||||
## File for 'response_cost' calculation in Logging
|
||||
from typing import Optional, Union, Literal, List
|
||||
from typing import Optional, Union, Literal, List, Tuple
|
||||
import litellm._logging
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
|
@ -9,7 +9,6 @@ from litellm.utils import (
|
|||
TranscriptionResponse,
|
||||
TextCompletionResponse,
|
||||
CallTypes,
|
||||
cost_per_token,
|
||||
print_verbose,
|
||||
CostPerToken,
|
||||
token_counter,
|
||||
|
@ -18,6 +17,224 @@ import litellm
|
|||
from litellm import verbose_logger
|
||||
|
||||
|
||||
def _cost_per_token_custom_pricing_helper(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
response_time_ms=None,
|
||||
### CUSTOM PRICING ###
|
||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||
custom_cost_per_second: Optional[float] = None,
|
||||
) -> Optional[Tuple[float, float]]:
|
||||
"""Internal helper function for calculating cost, if custom pricing given"""
|
||||
if custom_cost_per_token is None and custom_cost_per_second is None:
|
||||
return None
|
||||
|
||||
if custom_cost_per_token is not None:
|
||||
input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
|
||||
output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
|
||||
return input_cost, output_cost
|
||||
elif custom_cost_per_second is not None:
|
||||
output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore
|
||||
return 0, output_cost
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def cost_per_token(
|
||||
model: str = "",
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
response_time_ms=None,
|
||||
custom_llm_provider=None,
|
||||
region_name=None,
|
||||
### CUSTOM PRICING ###
|
||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||
custom_cost_per_second: Optional[float] = None,
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
|
||||
Parameters:
|
||||
model (str): The name of the model to use. Default is ""
|
||||
prompt_tokens (int): The number of tokens in the prompt.
|
||||
completion_tokens (int): The number of tokens in the completion.
|
||||
response_time (float): The amount of time, in milliseconds, it took the call to complete.
|
||||
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
|
||||
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
||||
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
||||
"""
|
||||
if model is None:
|
||||
raise Exception("Invalid arg. Model cannot be none.")
|
||||
## CUSTOM PRICING ##
|
||||
response_cost = _cost_per_token_custom_pricing_helper(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
response_time_ms=response_time_ms,
|
||||
custom_cost_per_second=custom_cost_per_second,
|
||||
custom_cost_per_token=custom_cost_per_token,
|
||||
)
|
||||
if response_cost is not None:
|
||||
return response_cost[0], response_cost[1]
|
||||
|
||||
# given
|
||||
prompt_tokens_cost_usd_dollar: float = 0
|
||||
completion_tokens_cost_usd_dollar: float = 0
|
||||
model_cost_ref = litellm.model_cost
|
||||
model_with_provider = model
|
||||
if custom_llm_provider is not None:
|
||||
model_with_provider = custom_llm_provider + "/" + model
|
||||
if region_name is not None:
|
||||
model_with_provider_and_region = (
|
||||
f"{custom_llm_provider}/{region_name}/{model}"
|
||||
)
|
||||
if (
|
||||
model_with_provider_and_region in model_cost_ref
|
||||
): # use region based pricing, if it's available
|
||||
model_with_provider = model_with_provider_and_region
|
||||
|
||||
model_without_prefix = model
|
||||
model_parts = model.split("/")
|
||||
if len(model_parts) > 1:
|
||||
model_without_prefix = model_parts[1]
|
||||
else:
|
||||
model_without_prefix = model
|
||||
"""
|
||||
Code block that formats model to lookup in litellm.model_cost
|
||||
Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
|
||||
Option2. model = "openai/gpt-4" - model = provider/model
|
||||
Option3. model = "anthropic.claude-3" - model = model
|
||||
"""
|
||||
if (
|
||||
model_with_provider in model_cost_ref
|
||||
): # Option 2. use model with provider, model = "openai/gpt-4"
|
||||
model = model_with_provider
|
||||
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
|
||||
model = model
|
||||
elif (
|
||||
model_without_prefix in model_cost_ref
|
||||
): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
|
||||
model = model_without_prefix
|
||||
|
||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||
if model in model_cost_ref:
|
||||
print_verbose(f"Success: model={model} in model_cost_map")
|
||||
print_verbose(
|
||||
f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
|
||||
)
|
||||
if (
|
||||
model_cost_ref[model].get("input_cost_per_token", None) is not None
|
||||
and model_cost_ref[model].get("output_cost_per_token", None) is not None
|
||||
):
|
||||
## COST PER TOKEN ##
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||
)
|
||||
elif (
|
||||
model_cost_ref[model].get("output_cost_per_second", None) is not None
|
||||
and response_time_ms is not None
|
||||
):
|
||||
print_verbose(
|
||||
f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
|
||||
)
|
||||
## COST PER SECOND ##
|
||||
prompt_tokens_cost_usd_dollar = 0
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["output_cost_per_second"]
|
||||
* response_time_ms
|
||||
/ 1000
|
||||
)
|
||||
elif (
|
||||
model_cost_ref[model].get("input_cost_per_second", None) is not None
|
||||
and response_time_ms is not None
|
||||
):
|
||||
print_verbose(
|
||||
f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
|
||||
)
|
||||
## COST PER SECOND ##
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = 0.0
|
||||
print_verbose(
|
||||
f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif "ft:gpt-3.5-turbo" in model:
|
||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
|
||||
* completion_tokens
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif "ft:davinci-002" in model:
|
||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||
# fuzzy match ft:davinci-002:abcd-id-cool-litellm
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
|
||||
* completion_tokens
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif "ft:babbage-002" in model:
|
||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||
# fuzzy match ft:babbage-002:abcd-id-cool-litellm
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
|
||||
* completion_tokens
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif model in litellm.azure_llms:
|
||||
verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
|
||||
model = litellm.azure_llms[model]
|
||||
verbose_logger.debug(
|
||||
f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
|
||||
)
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
verbose_logger.debug(
|
||||
f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif model in litellm.azure_embedding_models:
|
||||
verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
|
||||
model = litellm.azure_embedding_models[model]
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
else:
|
||||
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know
|
||||
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
|
||||
raise litellm.exceptions.NotFoundError( # type: ignore
|
||||
message=error_str,
|
||||
model=model,
|
||||
llm_provider="",
|
||||
)
|
||||
|
||||
|
||||
# Extract the number of billion parameters from the model name
|
||||
# only used for together_computer LLMs
|
||||
def get_model_params_and_category(model_name) -> str:
|
||||
|
|
|
@ -324,7 +324,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
|
|||
message,
|
||||
model,
|
||||
llm_provider,
|
||||
response: httpx.Response,
|
||||
response: Optional[httpx.Response] = None,
|
||||
litellm_debug_info: Optional[str] = None,
|
||||
):
|
||||
self.status_code = 400
|
||||
|
@ -332,11 +332,13 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
|
|||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
|
||||
self.response = response or httpx.Response(status_code=400, request=request)
|
||||
super().__init__(
|
||||
message=self.message,
|
||||
model=self.model, # type: ignore
|
||||
llm_provider=self.llm_provider, # type: ignore
|
||||
response=response,
|
||||
response=self.response,
|
||||
litellm_debug_info=self.litellm_debug_info,
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
@ -407,7 +409,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
|
|||
message,
|
||||
model,
|
||||
llm_provider,
|
||||
response: httpx.Response,
|
||||
response: Optional[httpx.Response] = None,
|
||||
litellm_debug_info: Optional[str] = None,
|
||||
):
|
||||
self.status_code = 400
|
||||
|
@ -415,11 +417,13 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
|
|||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
|
||||
self.response = response or httpx.Response(status_code=500, request=request)
|
||||
super().__init__(
|
||||
message=self.message,
|
||||
model=self.model, # type: ignore
|
||||
llm_provider=self.llm_provider, # type: ignore
|
||||
response=response,
|
||||
response=self.response,
|
||||
litellm_debug_info=self.litellm_debug_info,
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
|
|
@ -226,14 +226,6 @@ def _start_clickhouse():
|
|||
response = client.query("DESCRIBE default.spend_logs")
|
||||
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
||||
|
||||
# RUN Enterprise Clickhouse Setup
|
||||
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
|
||||
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
|
||||
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
|
||||
|
||||
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
|
||||
_create_clickhouse_material_views(client=client, table_names=table_names)
|
||||
|
||||
|
||||
class ClickhouseLogger:
|
||||
# Class variables or attributes
|
||||
|
|
136
litellm/integrations/email_alerting.py
Normal file
136
litellm/integrations/email_alerting.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
"""
|
||||
Functions for sending Email Alerts
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Optional, List
|
||||
from litellm.proxy._types import WebhookEvent
|
||||
import asyncio
|
||||
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||
|
||||
# we use this for the email header, please send a test email if you change this. verify it looks good on email
|
||||
LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
|
||||
LITELLM_SUPPORT_CONTACT = "support@berri.ai"
|
||||
|
||||
|
||||
async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
|
||||
verbose_logger.debug(
|
||||
"Email Alerting: Getting all team members for team_id=%s", team_id
|
||||
)
|
||||
if team_id is None:
|
||||
return []
|
||||
from litellm.proxy.proxy_server import premium_user, prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise Exception("Not connected to DB!")
|
||||
|
||||
team_row = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={
|
||||
"team_id": team_id,
|
||||
}
|
||||
)
|
||||
|
||||
if team_row is None:
|
||||
return []
|
||||
|
||||
_team_members = team_row.members_with_roles
|
||||
verbose_logger.debug(
|
||||
"Email Alerting: Got team members for team_id=%s Team Members: %s",
|
||||
team_id,
|
||||
_team_members,
|
||||
)
|
||||
_team_member_user_ids: List[str] = []
|
||||
for member in _team_members:
|
||||
if member and isinstance(member, dict) and member.get("user_id") is not None:
|
||||
_team_member_user_ids.append(member.get("user_id"))
|
||||
|
||||
sql_query = """
|
||||
SELECT user_email
|
||||
FROM "LiteLLM_UserTable"
|
||||
WHERE user_id = ANY($1::TEXT[]);
|
||||
"""
|
||||
|
||||
_result = await prisma_client.db.query_raw(sql_query, _team_member_user_ids)
|
||||
|
||||
verbose_logger.debug("Email Alerting: Got all Emails for team, emails=%s", _result)
|
||||
|
||||
if _result is None:
|
||||
return []
|
||||
|
||||
emails = []
|
||||
for user in _result:
|
||||
if user and isinstance(user, dict) and user.get("user_email", None) is not None:
|
||||
emails.append(user.get("user_email"))
|
||||
return emails
|
||||
|
||||
|
||||
async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
|
||||
"""
|
||||
Send an Email Alert to All Team Members when the Team Budget is crossed
|
||||
Returns -> True if sent, False if not.
|
||||
"""
|
||||
from litellm.proxy.utils import send_email
|
||||
|
||||
from litellm.proxy.proxy_server import premium_user, prisma_client
|
||||
|
||||
_team_id = webhook_event.team_id
|
||||
team_alias = webhook_event.team_alias
|
||||
verbose_logger.debug(
|
||||
"Email Alerting: Sending Team Budget Alert for team=%s", team_alias
|
||||
)
|
||||
|
||||
email_logo_url = os.getenv("SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None))
|
||||
email_support_contact = os.getenv("EMAIL_SUPPORT_CONTACT", None)
|
||||
|
||||
# await self._check_if_using_premium_email_feature(
|
||||
# premium_user, email_logo_url, email_support_contact
|
||||
# )
|
||||
|
||||
if email_logo_url is None:
|
||||
email_logo_url = LITELLM_LOGO_URL
|
||||
if email_support_contact is None:
|
||||
email_support_contact = LITELLM_SUPPORT_CONTACT
|
||||
recipient_emails = await get_all_team_member_emails(_team_id)
|
||||
recipient_emails_str: str = ",".join(recipient_emails)
|
||||
verbose_logger.debug(
|
||||
"Email Alerting: Sending team budget alert to %s", recipient_emails_str
|
||||
)
|
||||
|
||||
event_name = webhook_event.event_message
|
||||
max_budget = webhook_event.max_budget
|
||||
email_html_content = "Alert from LiteLLM Server"
|
||||
|
||||
if recipient_emails_str is None:
|
||||
verbose_proxy_logger.error(
|
||||
"Email Alerting: Trying to send email alert to no recipient, got recipient_emails=%s",
|
||||
recipient_emails_str,
|
||||
)
|
||||
|
||||
email_html_content = f"""
|
||||
<img src="{email_logo_url}" alt="LiteLLM Logo" width="150" height="50" /> <br/><br/><br/>
|
||||
|
||||
Budget Crossed for Team <b> {team_alias} </b> <br/> <br/>
|
||||
|
||||
Your Teams LLM API usage has crossed it's <b> budget of ${max_budget} </b>, current spend is <b>${webhook_event.spend}</b><br /> <br />
|
||||
|
||||
API requests will be rejected until either (a) you increase your budget or (b) your budget gets reset <br /> <br />
|
||||
|
||||
If you have any questions, please send an email to {email_support_contact} <br /> <br />
|
||||
|
||||
Best, <br />
|
||||
The LiteLLM team <br />
|
||||
"""
|
||||
|
||||
email_event = {
|
||||
"to": recipient_emails_str,
|
||||
"subject": f"LiteLLM {event_name} for Team {team_alias}",
|
||||
"html": email_html_content,
|
||||
}
|
||||
|
||||
await send_email(
|
||||
receiver_email=email_event["to"],
|
||||
subject=email_event["subject"],
|
||||
html=email_event["html"],
|
||||
)
|
||||
|
||||
return False
|
|
@ -330,6 +330,7 @@ class SlackAlerting(CustomLogger):
|
|||
messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
|
||||
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
|
||||
slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
|
||||
alerting_metadata: dict = {}
|
||||
if time_difference_float > self.alerting_threshold:
|
||||
# add deployment latencies to alert
|
||||
if (
|
||||
|
@ -337,7 +338,7 @@ class SlackAlerting(CustomLogger):
|
|||
and "litellm_params" in kwargs
|
||||
and "metadata" in kwargs["litellm_params"]
|
||||
):
|
||||
_metadata = kwargs["litellm_params"]["metadata"]
|
||||
_metadata: dict = kwargs["litellm_params"]["metadata"]
|
||||
request_info = litellm.utils._add_key_name_and_team_to_alert(
|
||||
request_info=request_info, metadata=_metadata
|
||||
)
|
||||
|
@ -349,10 +350,14 @@ class SlackAlerting(CustomLogger):
|
|||
request_info += (
|
||||
f"\nAvailable Deployment Latencies\n{_deployment_latency_map}"
|
||||
)
|
||||
|
||||
if "alerting_metadata" in _metadata:
|
||||
alerting_metadata = _metadata["alerting_metadata"]
|
||||
await self.send_alert(
|
||||
message=slow_message + request_info,
|
||||
level="Low",
|
||||
alert_type="llm_too_slow",
|
||||
alerting_metadata=alerting_metadata,
|
||||
)
|
||||
|
||||
async def async_update_daily_reports(
|
||||
|
@ -540,7 +545,12 @@ class SlackAlerting(CustomLogger):
|
|||
message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s"
|
||||
|
||||
# send alert
|
||||
await self.send_alert(message=message, level="Low", alert_type="daily_reports")
|
||||
await self.send_alert(
|
||||
message=message,
|
||||
level="Low",
|
||||
alert_type="daily_reports",
|
||||
alerting_metadata={},
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -582,6 +592,7 @@ class SlackAlerting(CustomLogger):
|
|||
await asyncio.sleep(
|
||||
self.alerting_threshold
|
||||
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
|
||||
alerting_metadata: dict = {}
|
||||
if (
|
||||
request_data is not None
|
||||
and request_data.get("litellm_status", "") != "success"
|
||||
|
@ -606,7 +617,7 @@ class SlackAlerting(CustomLogger):
|
|||
):
|
||||
# In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data``
|
||||
# in that case we fallback to the api base set in the request metadata
|
||||
_metadata = request_data["metadata"]
|
||||
_metadata: dict = request_data["metadata"]
|
||||
_api_base = _metadata.get("api_base", "")
|
||||
|
||||
request_info = litellm.utils._add_key_name_and_team_to_alert(
|
||||
|
@ -615,6 +626,9 @@ class SlackAlerting(CustomLogger):
|
|||
|
||||
if _api_base is None:
|
||||
_api_base = ""
|
||||
|
||||
if "alerting_metadata" in _metadata:
|
||||
alerting_metadata = _metadata["alerting_metadata"]
|
||||
request_info += f"\nAPI Base: `{_api_base}`"
|
||||
# only alert hanging responses if they have not been marked as success
|
||||
alerting_message = (
|
||||
|
@ -640,6 +654,7 @@ class SlackAlerting(CustomLogger):
|
|||
message=alerting_message + request_info,
|
||||
level="Medium",
|
||||
alert_type="llm_requests_hanging",
|
||||
alerting_metadata=alerting_metadata,
|
||||
)
|
||||
|
||||
async def failed_tracking_alert(self, error_message: str):
|
||||
|
@ -650,7 +665,10 @@ class SlackAlerting(CustomLogger):
|
|||
result = await _cache.async_get_cache(key=_cache_key)
|
||||
if result is None:
|
||||
await self.send_alert(
|
||||
message=message, level="High", alert_type="budget_alerts"
|
||||
message=message,
|
||||
level="High",
|
||||
alert_type="budget_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
await _cache.async_set_cache(
|
||||
key=_cache_key,
|
||||
|
@ -680,7 +698,7 @@ class SlackAlerting(CustomLogger):
|
|||
return
|
||||
if "budget_alerts" not in self.alert_types:
|
||||
return
|
||||
_id: str = "default_id" # used for caching
|
||||
_id: Optional[str] = "default_id" # used for caching
|
||||
user_info_json = user_info.model_dump(exclude_none=True)
|
||||
for k, v in user_info_json.items():
|
||||
user_info_str = "\n{}: {}\n".format(k, v)
|
||||
|
@ -751,6 +769,7 @@ class SlackAlerting(CustomLogger):
|
|||
level="High",
|
||||
alert_type="budget_alerts",
|
||||
user_info=webhook_event,
|
||||
alerting_metadata={},
|
||||
)
|
||||
await _cache.async_set_cache(
|
||||
key=_cache_key,
|
||||
|
@ -769,7 +788,13 @@ class SlackAlerting(CustomLogger):
|
|||
response_cost: Optional[float],
|
||||
max_budget: Optional[float],
|
||||
):
|
||||
if end_user_id is not None and token is not None and response_cost is not None:
|
||||
if (
|
||||
self.alerting is not None
|
||||
and "webhook" in self.alerting
|
||||
and end_user_id is not None
|
||||
and token is not None
|
||||
and response_cost is not None
|
||||
):
|
||||
# log customer spend
|
||||
event = WebhookEvent(
|
||||
spend=response_cost,
|
||||
|
@ -941,7 +966,10 @@ class SlackAlerting(CustomLogger):
|
|||
)
|
||||
# send minor alert
|
||||
await self.send_alert(
|
||||
message=msg, level="Medium", alert_type="outage_alerts"
|
||||
message=msg,
|
||||
level="Medium",
|
||||
alert_type="outage_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
# set to true
|
||||
outage_value["minor_alert_sent"] = True
|
||||
|
@ -963,7 +991,12 @@ class SlackAlerting(CustomLogger):
|
|||
)
|
||||
|
||||
# send minor alert
|
||||
await self.send_alert(message=msg, level="High", alert_type="outage_alerts")
|
||||
await self.send_alert(
|
||||
message=msg,
|
||||
level="High",
|
||||
alert_type="outage_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
# set to true
|
||||
outage_value["major_alert_sent"] = True
|
||||
|
||||
|
@ -1062,7 +1095,10 @@ class SlackAlerting(CustomLogger):
|
|||
)
|
||||
# send minor alert
|
||||
await self.send_alert(
|
||||
message=msg, level="Medium", alert_type="outage_alerts"
|
||||
message=msg,
|
||||
level="Medium",
|
||||
alert_type="outage_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
# set to true
|
||||
outage_value["minor_alert_sent"] = True
|
||||
|
@ -1081,7 +1117,10 @@ class SlackAlerting(CustomLogger):
|
|||
)
|
||||
# send minor alert
|
||||
await self.send_alert(
|
||||
message=msg, level="High", alert_type="outage_alerts"
|
||||
message=msg,
|
||||
level="High",
|
||||
alert_type="outage_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
# set to true
|
||||
outage_value["major_alert_sent"] = True
|
||||
|
@ -1143,7 +1182,10 @@ Model Info:
|
|||
"""
|
||||
|
||||
alert_val = self.send_alert(
|
||||
message=message, level="Low", alert_type="new_model_added"
|
||||
message=message,
|
||||
level="Low",
|
||||
alert_type="new_model_added",
|
||||
alerting_metadata={},
|
||||
)
|
||||
|
||||
if alert_val is not None and asyncio.iscoroutine(alert_val):
|
||||
|
@ -1159,6 +1201,9 @@ Model Info:
|
|||
Currently only implemented for budget alerts
|
||||
|
||||
Returns -> True if sent, False if not.
|
||||
|
||||
Raises Exception
|
||||
- if WEBHOOK_URL is not set
|
||||
"""
|
||||
|
||||
webhook_url = os.getenv("WEBHOOK_URL", None)
|
||||
|
@ -1297,7 +1342,9 @@ Model Info:
|
|||
verbose_proxy_logger.error("Error sending email alert %s", str(e))
|
||||
return False
|
||||
|
||||
async def send_email_alert_using_smtp(self, webhook_event: WebhookEvent) -> bool:
|
||||
async def send_email_alert_using_smtp(
|
||||
self, webhook_event: WebhookEvent, alert_type: str
|
||||
) -> bool:
|
||||
"""
|
||||
Sends structured Email alert to an SMTP server
|
||||
|
||||
|
@ -1306,7 +1353,6 @@ Model Info:
|
|||
Returns -> True if sent, False if not.
|
||||
"""
|
||||
from litellm.proxy.utils import send_email
|
||||
|
||||
from litellm.proxy.proxy_server import premium_user, prisma_client
|
||||
|
||||
email_logo_url = os.getenv(
|
||||
|
@ -1360,6 +1406,10 @@ Model Info:
|
|||
subject=email_event["subject"],
|
||||
html=email_event["html"],
|
||||
)
|
||||
if webhook_event.event_group == "team":
|
||||
from litellm.integrations.email_alerting import send_team_budget_alert
|
||||
|
||||
await send_team_budget_alert(webhook_event=webhook_event)
|
||||
|
||||
return False
|
||||
|
||||
|
@ -1368,6 +1418,7 @@ Model Info:
|
|||
message: str,
|
||||
level: Literal["Low", "Medium", "High"],
|
||||
alert_type: Literal[AlertType],
|
||||
alerting_metadata: dict,
|
||||
user_info: Optional[WebhookEvent] = None,
|
||||
**kwargs,
|
||||
):
|
||||
|
@ -1401,7 +1452,9 @@ Model Info:
|
|||
and user_info is not None
|
||||
):
|
||||
# only send budget alerts over Email
|
||||
await self.send_email_alert_using_smtp(webhook_event=user_info)
|
||||
await self.send_email_alert_using_smtp(
|
||||
webhook_event=user_info, alert_type=alert_type
|
||||
)
|
||||
|
||||
if "slack" not in self.alerting:
|
||||
return
|
||||
|
@ -1425,6 +1478,9 @@ Model Info:
|
|||
if kwargs:
|
||||
for key, value in kwargs.items():
|
||||
formatted_message += f"\n\n{key}: `{value}`\n\n"
|
||||
if alerting_metadata:
|
||||
for key, value in alerting_metadata.items():
|
||||
formatted_message += f"\n\n*Alerting Metadata*: \n{key}: `{value}`\n\n"
|
||||
if _proxy_base_url is not None:
|
||||
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
|
||||
|
||||
|
@ -1440,7 +1496,7 @@ Model Info:
|
|||
slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None)
|
||||
|
||||
if slack_webhook_url is None:
|
||||
raise Exception("Missing SLACK_WEBHOOK_URL from environment")
|
||||
raise ValueError("Missing SLACK_WEBHOOK_URL from environment")
|
||||
payload = {"text": formatted_message}
|
||||
headers = {"Content-type": "application/json"}
|
||||
|
||||
|
@ -1453,7 +1509,7 @@ Model Info:
|
|||
pass
|
||||
else:
|
||||
verbose_proxy_logger.debug(
|
||||
"Error sending slack alert. Error=", response.text
|
||||
"Error sending slack alert. Error={}".format(response.text)
|
||||
)
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
|
@ -1622,6 +1678,7 @@ Model Info:
|
|||
message=_weekly_spend_message,
|
||||
level="Low",
|
||||
alert_type="spend_reports",
|
||||
alerting_metadata={},
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error("Error sending weekly spend report", e)
|
||||
|
@ -1673,6 +1730,7 @@ Model Info:
|
|||
message=_spend_message,
|
||||
level="Low",
|
||||
alert_type="spend_reports",
|
||||
alerting_metadata={},
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error("Error sending weekly spend report", e)
|
||||
|
|
41
litellm/litellm_core_utils/core_helpers.py
Normal file
41
litellm/litellm_core_utils/core_helpers.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
# What is this?
|
||||
## Helper utilities for the model response objects
|
||||
|
||||
|
||||
def map_finish_reason(
|
||||
finish_reason: str,
|
||||
): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
|
||||
# anthropic mapping
|
||||
if finish_reason == "stop_sequence":
|
||||
return "stop"
|
||||
# cohere mapping - https://docs.cohere.com/reference/generate
|
||||
elif finish_reason == "COMPLETE":
|
||||
return "stop"
|
||||
elif finish_reason == "MAX_TOKENS": # cohere + vertex ai
|
||||
return "length"
|
||||
elif finish_reason == "ERROR_TOXIC":
|
||||
return "content_filter"
|
||||
elif (
|
||||
finish_reason == "ERROR"
|
||||
): # openai currently doesn't support an 'error' finish reason
|
||||
return "stop"
|
||||
# huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream
|
||||
elif finish_reason == "eos_token" or finish_reason == "stop_sequence":
|
||||
return "stop"
|
||||
elif (
|
||||
finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP"
|
||||
): # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',]
|
||||
return "stop"
|
||||
elif finish_reason == "SAFETY": # vertex ai
|
||||
return "content_filter"
|
||||
elif finish_reason == "STOP": # vertex ai
|
||||
return "stop"
|
||||
elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic
|
||||
return "stop"
|
||||
elif finish_reason == "max_tokens": # anthropic
|
||||
return "length"
|
||||
elif finish_reason == "tool_use": # anthropic
|
||||
return "tool_calls"
|
||||
elif finish_reason == "content_filtered":
|
||||
return "content_filter"
|
||||
return finish_reason
|
1780
litellm/litellm_core_utils/litellm_logging.py
Normal file
1780
litellm/litellm_core_utils/litellm_logging.py
Normal file
File diff suppressed because it is too large
Load diff
|
@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any
|
|||
import litellm
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.utils import Logging as _LiteLLMLoggingObject
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
Logging as _LiteLLMLoggingObject,
|
||||
)
|
||||
|
||||
LiteLLMLoggingObject = _LiteLLMLoggingObject
|
||||
else:
|
||||
|
@ -30,7 +32,6 @@ def redact_message_input_output_from_logging(
|
|||
if litellm.turn_off_message_logging is not True:
|
||||
return result
|
||||
|
||||
_result = copy.deepcopy(result)
|
||||
# remove messages, prompts, input, response from logging
|
||||
litellm_logging_obj.model_call_details["messages"] = [
|
||||
{"role": "user", "content": "redacted-by-litellm"}
|
||||
|
@ -53,8 +54,10 @@ def redact_message_input_output_from_logging(
|
|||
elif isinstance(choice, litellm.utils.StreamingChoices):
|
||||
choice.delta.content = "redacted-by-litellm"
|
||||
else:
|
||||
if _result is not None:
|
||||
if isinstance(_result, litellm.ModelResponse):
|
||||
if result is not None:
|
||||
if isinstance(result, litellm.ModelResponse):
|
||||
# only deep copy litellm.ModelResponse
|
||||
_result = copy.deepcopy(result)
|
||||
if hasattr(_result, "choices") and _result.choices is not None:
|
||||
for choice in _result.choices:
|
||||
if isinstance(choice, litellm.Choices):
|
||||
|
@ -63,3 +66,6 @@ def redact_message_input_output_from_logging(
|
|||
choice.delta.content = "redacted-by-litellm"
|
||||
|
||||
return _result
|
||||
|
||||
# by default return result
|
||||
return result
|
||||
|
|
|
@ -5,10 +5,16 @@ import requests, copy # type: ignore
|
|||
import time
|
||||
from functools import partial
|
||||
from typing import Callable, Optional, List, Union
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||
import litellm.litellm_core_utils
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
_get_async_httpx_client,
|
||||
_get_httpx_client,
|
||||
)
|
||||
from .base import BaseLLM
|
||||
import httpx # type: ignore
|
||||
from litellm.types.llms.anthropic import AnthropicMessagesToolChoice
|
||||
|
@ -171,7 +177,7 @@ async def make_call(
|
|||
logging_obj,
|
||||
):
|
||||
if client is None:
|
||||
client = AsyncHTTPHandler() # Create a new client if none provided
|
||||
client = _get_async_httpx_client() # Create a new client if none provided
|
||||
|
||||
response = await client.post(api_base, headers=headers, data=data, stream=True)
|
||||
|
||||
|
@ -201,7 +207,7 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
response: Union[requests.Response, httpx.Response],
|
||||
model_response: ModelResponse,
|
||||
stream: bool,
|
||||
logging_obj: litellm.utils.Logging,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
optional_params: dict,
|
||||
api_key: str,
|
||||
data: Union[dict, str],
|
||||
|
@ -316,7 +322,7 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
response: Union[requests.Response, httpx.Response],
|
||||
model_response: ModelResponse,
|
||||
stream: bool,
|
||||
logging_obj: litellm.utils.Logging,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
optional_params: dict,
|
||||
api_key: str,
|
||||
data: Union[dict, str],
|
||||
|
@ -463,9 +469,7 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
logger_fn=None,
|
||||
headers={},
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
async_handler = AsyncHTTPHandler(
|
||||
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
||||
)
|
||||
async_handler = _get_async_httpx_client()
|
||||
response = await async_handler.post(api_base, headers=headers, json=data)
|
||||
if stream and _is_function_call:
|
||||
return self.process_streaming_response(
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
import litellm
|
||||
import httpx, requests
|
||||
from typing import Optional, Union
|
||||
from litellm.utils import Logging
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
|
||||
|
||||
class BaseLLM:
|
||||
|
|
|
@ -5,12 +5,10 @@ import time, uuid
|
|||
from typing import Callable, Optional, Any, Union, List
|
||||
import litellm
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
get_secret,
|
||||
Usage,
|
||||
ImageResponse,
|
||||
map_finish_reason,
|
||||
)
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.types.utils import ImageResponse, ModelResponse, Usage
|
||||
from .prompt_templates.factory import (
|
||||
prompt_factory,
|
||||
custom_prompt,
|
||||
|
@ -633,7 +631,11 @@ def init_bedrock_client(
|
|||
config = boto3.session.Config()
|
||||
|
||||
### CHECK STS ###
|
||||
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||
if (
|
||||
aws_web_identity_token is not None
|
||||
and aws_role_name is not None
|
||||
and aws_session_name is not None
|
||||
):
|
||||
oidc_token = get_secret(aws_web_identity_token)
|
||||
|
||||
if oidc_token is None:
|
||||
|
@ -642,9 +644,7 @@ def init_bedrock_client(
|
|||
status_code=401,
|
||||
)
|
||||
|
||||
sts_client = boto3.client(
|
||||
"sts"
|
||||
)
|
||||
sts_client = boto3.client("sts")
|
||||
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||
|
|
|
@ -22,13 +22,12 @@ from typing import (
|
|||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
Usage,
|
||||
map_finish_reason,
|
||||
CustomStreamWrapper,
|
||||
Message,
|
||||
Choices,
|
||||
get_secret,
|
||||
Logging,
|
||||
)
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.types.utils import Message, Choices
|
||||
import litellm, uuid
|
||||
from .prompt_templates.factory import (
|
||||
prompt_factory,
|
||||
|
@ -41,7 +40,12 @@ from .prompt_templates.factory import (
|
|||
_bedrock_converse_messages_pt,
|
||||
_bedrock_tools_pt,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
_get_async_httpx_client,
|
||||
_get_httpx_client,
|
||||
)
|
||||
from .base import BaseLLM
|
||||
import httpx # type: ignore
|
||||
from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
|
||||
|
@ -57,6 +61,7 @@ from litellm.caching import DualCache
|
|||
|
||||
iam_cache = DualCache()
|
||||
|
||||
|
||||
class AmazonCohereChatConfig:
|
||||
"""
|
||||
Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
|
||||
|
@ -167,7 +172,7 @@ async def make_call(
|
|||
logging_obj,
|
||||
):
|
||||
if client is None:
|
||||
client = AsyncHTTPHandler() # Create a new client if none provided
|
||||
client = _get_async_httpx_client() # Create a new client if none provided
|
||||
|
||||
response = await client.post(api_base, headers=headers, data=data, stream=True)
|
||||
|
||||
|
@ -198,7 +203,7 @@ def make_sync_call(
|
|||
logging_obj,
|
||||
):
|
||||
if client is None:
|
||||
client = HTTPHandler() # Create a new client if none provided
|
||||
client = _get_httpx_client() # Create a new client if none provided
|
||||
|
||||
response = client.post(api_base, headers=headers, data=data, stream=True)
|
||||
|
||||
|
@ -327,13 +332,19 @@ class BedrockLLM(BaseLLM):
|
|||
) = params_to_check
|
||||
|
||||
### CHECK STS ###
|
||||
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||
iam_creds_cache_key = json.dumps({
|
||||
if (
|
||||
aws_web_identity_token is not None
|
||||
and aws_role_name is not None
|
||||
and aws_session_name is not None
|
||||
):
|
||||
iam_creds_cache_key = json.dumps(
|
||||
{
|
||||
"aws_web_identity_token": aws_web_identity_token,
|
||||
"aws_role_name": aws_role_name,
|
||||
"aws_session_name": aws_session_name,
|
||||
"aws_region_name": aws_region_name,
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
|
||||
if iam_creds_dict is None:
|
||||
|
@ -348,7 +359,7 @@ class BedrockLLM(BaseLLM):
|
|||
sts_client = boto3.client(
|
||||
"sts",
|
||||
region_name=aws_region_name,
|
||||
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
|
||||
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
|
||||
)
|
||||
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
|
@ -362,12 +373,18 @@ class BedrockLLM(BaseLLM):
|
|||
|
||||
iam_creds_dict = {
|
||||
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
|
||||
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
|
||||
"aws_secret_access_key": sts_response["Credentials"][
|
||||
"SecretAccessKey"
|
||||
],
|
||||
"aws_session_token": sts_response["Credentials"]["SessionToken"],
|
||||
"region_name": aws_region_name,
|
||||
}
|
||||
|
||||
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
|
||||
iam_cache.set_cache(
|
||||
key=iam_creds_cache_key,
|
||||
value=json.dumps(iam_creds_dict),
|
||||
ttl=3600 - 60,
|
||||
)
|
||||
|
||||
session = boto3.Session(**iam_creds_dict)
|
||||
|
||||
|
@ -976,7 +993,7 @@ class BedrockLLM(BaseLLM):
|
|||
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||
timeout = httpx.Timeout(timeout)
|
||||
_params["timeout"] = timeout
|
||||
self.client = HTTPHandler(**_params) # type: ignore
|
||||
self.client = _get_httpx_client(_params) # type: ignore
|
||||
else:
|
||||
self.client = client
|
||||
if (stream is not None and stream == True) and provider != "ai21":
|
||||
|
@ -1058,7 +1075,7 @@ class BedrockLLM(BaseLLM):
|
|||
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||
timeout = httpx.Timeout(timeout)
|
||||
_params["timeout"] = timeout
|
||||
client = AsyncHTTPHandler(**_params) # type: ignore
|
||||
client = _get_async_httpx_client(_params) # type: ignore
|
||||
else:
|
||||
client = client # type: ignore
|
||||
|
||||
|
@ -1433,13 +1450,19 @@ class BedrockConverseLLM(BaseLLM):
|
|||
) = params_to_check
|
||||
|
||||
### CHECK STS ###
|
||||
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||
iam_creds_cache_key = json.dumps({
|
||||
if (
|
||||
aws_web_identity_token is not None
|
||||
and aws_role_name is not None
|
||||
and aws_session_name is not None
|
||||
):
|
||||
iam_creds_cache_key = json.dumps(
|
||||
{
|
||||
"aws_web_identity_token": aws_web_identity_token,
|
||||
"aws_role_name": aws_role_name,
|
||||
"aws_session_name": aws_session_name,
|
||||
"aws_region_name": aws_region_name,
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
|
||||
if iam_creds_dict is None:
|
||||
|
@ -1454,7 +1477,7 @@ class BedrockConverseLLM(BaseLLM):
|
|||
sts_client = boto3.client(
|
||||
"sts",
|
||||
region_name=aws_region_name,
|
||||
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
|
||||
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
|
||||
)
|
||||
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
|
@ -1468,12 +1491,18 @@ class BedrockConverseLLM(BaseLLM):
|
|||
|
||||
iam_creds_dict = {
|
||||
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
|
||||
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
|
||||
"aws_secret_access_key": sts_response["Credentials"][
|
||||
"SecretAccessKey"
|
||||
],
|
||||
"aws_session_token": sts_response["Credentials"]["SessionToken"],
|
||||
"region_name": aws_region_name,
|
||||
}
|
||||
|
||||
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
|
||||
iam_cache.set_cache(
|
||||
key=iam_creds_cache_key,
|
||||
value=json.dumps(iam_creds_dict),
|
||||
ttl=3600 - 60,
|
||||
)
|
||||
|
||||
session = boto3.Session(**iam_creds_dict)
|
||||
|
||||
|
@ -1575,7 +1604,7 @@ class BedrockConverseLLM(BaseLLM):
|
|||
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||
timeout = httpx.Timeout(timeout)
|
||||
_params["timeout"] = timeout
|
||||
client = AsyncHTTPHandler(**_params) # type: ignore
|
||||
client = _get_async_httpx_client(_params) # type: ignore
|
||||
else:
|
||||
client = client # type: ignore
|
||||
|
||||
|
@ -1847,7 +1876,7 @@ class BedrockConverseLLM(BaseLLM):
|
|||
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||
timeout = httpx.Timeout(timeout)
|
||||
_params["timeout"] = timeout
|
||||
client = HTTPHandler(**_params) # type: ignore
|
||||
client = _get_httpx_client(_params) # type: ignore
|
||||
else:
|
||||
client = client
|
||||
try:
|
||||
|
|
|
@ -219,3 +219,60 @@ class HTTPHandler:
|
|||
self.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _get_async_httpx_client(params: Optional[dict] = None) -> AsyncHTTPHandler:
|
||||
"""
|
||||
Retrieves the async HTTP client from the cache
|
||||
If not present, creates a new client
|
||||
|
||||
Caches the new client and returns it.
|
||||
"""
|
||||
_params_key_name = ""
|
||||
if params is not None:
|
||||
for key, value in params.items():
|
||||
try:
|
||||
_params_key_name += f"{key}_{value}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_cache_key_name = "async_httpx_client" + _params_key_name
|
||||
if _cache_key_name in litellm.in_memory_llm_clients_cache:
|
||||
return litellm.in_memory_llm_clients_cache[_cache_key_name]
|
||||
|
||||
if params is not None:
|
||||
_new_client = AsyncHTTPHandler(**params)
|
||||
else:
|
||||
_new_client = AsyncHTTPHandler(
|
||||
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
||||
)
|
||||
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
|
||||
return _new_client
|
||||
|
||||
|
||||
def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
|
||||
"""
|
||||
Retrieves the HTTP client from the cache
|
||||
If not present, creates a new client
|
||||
|
||||
Caches the new client and returns it.
|
||||
"""
|
||||
_params_key_name = ""
|
||||
if params is not None:
|
||||
for key, value in params.items():
|
||||
try:
|
||||
_params_key_name += f"{key}_{value}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_cache_key_name = "httpx_client" + _params_key_name
|
||||
if _cache_key_name in litellm.in_memory_llm_clients_cache:
|
||||
return litellm.in_memory_llm_clients_cache[_cache_key_name]
|
||||
|
||||
if params is not None:
|
||||
_new_client = HTTPHandler(**params)
|
||||
else:
|
||||
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
||||
|
||||
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
|
||||
return _new_client
|
||||
|
|
|
@ -10,10 +10,10 @@ from typing import Callable, Optional, List, Union, Tuple, Literal
|
|||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
Usage,
|
||||
map_finish_reason,
|
||||
CustomStreamWrapper,
|
||||
EmbeddingResponse,
|
||||
)
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
|
@ -289,7 +289,7 @@ class DatabricksChatCompletion(BaseLLM):
|
|||
response: Union[requests.Response, httpx.Response],
|
||||
model_response: ModelResponse,
|
||||
stream: bool,
|
||||
logging_obj: litellm.utils.Logging,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
optional_params: dict,
|
||||
api_key: str,
|
||||
data: Union[dict, str],
|
||||
|
|
|
@ -12,11 +12,11 @@ from typing import Callable, Optional, List, Literal, Union
|
|||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
Usage,
|
||||
map_finish_reason,
|
||||
CustomStreamWrapper,
|
||||
Message,
|
||||
Choices,
|
||||
)
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
@ -198,7 +198,7 @@ class PredibaseChatCompletion(BaseLLM):
|
|||
response: Union[requests.Response, httpx.Response],
|
||||
model_response: ModelResponse,
|
||||
stream: bool,
|
||||
logging_obj: litellm.utils.Logging,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
optional_params: dict,
|
||||
api_key: str,
|
||||
data: Union[dict, str],
|
||||
|
|
|
@ -4,7 +4,6 @@ from enum import Enum
|
|||
import requests, copy # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
|
|
@ -5,7 +5,8 @@ import requests # type: ignore
|
|||
import time
|
||||
from typing import Callable, Optional, Union, List, Literal, Any
|
||||
from pydantic import BaseModel
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
import litellm, uuid
|
||||
import httpx, inspect # type: ignore
|
||||
from litellm.types.llms.vertex_ai import *
|
||||
|
|
|
@ -6,7 +6,8 @@ from enum import Enum
|
|||
import requests, copy # type: ignore
|
||||
import time, uuid
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
import litellm
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from .prompt_templates.factory import (
|
||||
|
|
|
@ -8,7 +8,10 @@ from enum import Enum
|
|||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, Union, List, Any, Tuple
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
||||
import litellm.litellm_core_utils
|
||||
import litellm.litellm_core_utils.litellm_logging
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
import litellm, uuid
|
||||
import httpx, inspect # type: ignore
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
|
@ -320,7 +323,7 @@ class VertexLLM(BaseLLM):
|
|||
model: str,
|
||||
response: httpx.Response,
|
||||
model_response: ModelResponse,
|
||||
logging_obj: litellm.utils.Logging,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
optional_params: dict,
|
||||
api_key: str,
|
||||
data: Union[dict, str],
|
||||
|
|
|
@ -368,7 +368,9 @@ async def acompletion(
|
|||
return response
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.acompletion(): Exception occured - {}".format(str(e))
|
||||
"litellm.acompletion(): Exception occured - {}\n{}".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
custom_llm_provider = custom_llm_provider or "openai"
|
||||
|
@ -399,6 +401,7 @@ def mock_completion(
|
|||
stream: Optional[bool] = False,
|
||||
mock_response: Union[str, Exception] = "This is a mock request",
|
||||
logging=None,
|
||||
custom_llm_provider=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
|
@ -436,7 +439,7 @@ def mock_completion(
|
|||
raise litellm.APIError(
|
||||
status_code=getattr(mock_response, "status_code", 500), # type: ignore
|
||||
message=getattr(mock_response, "text", str(mock_response)),
|
||||
llm_provider=getattr(mock_response, "llm_provider", "openai"), # type: ignore
|
||||
llm_provider=getattr(mock_response, "llm_provider", custom_llm_provider or "openai"), # type: ignore
|
||||
model=model, # type: ignore
|
||||
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
|
||||
)
|
||||
|
@ -905,6 +908,7 @@ def completion(
|
|||
logging=logging,
|
||||
acompletion=acompletion,
|
||||
mock_delay=kwargs.get("mock_delay", None),
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
if custom_llm_provider == "azure":
|
||||
# azure configs
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(64090);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
|
|
@ -1 +0,0 @@
|
|||
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(57437);n(2265);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(3827);n(64090);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{20723:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(57437),a=l(2265),r=l(47907),n=l(2179),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,505,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);
|
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/63f65dbb14efd996.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/b65d5698d1a1958d.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/63f65dbb14efd996.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"505\",\"static/chunks/505-5ff3c318fddfa35c.js\",\"131\",\"static/chunks/131-cb6bfe24e23e121b.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"759\",\"static/chunks/759-c0083d8a782d300e.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-8028473f1a04553d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/63f65dbb14efd996.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"sTvd1VbHSi_TBr1KiIpul\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-6f7793f21bbb2fbe.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-626098dc8320c801.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b65d5698d1a1958d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S9_6IC27HNWjJtr-LNaAO\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[68101,["936","static/chunks/2f6dbc85-cac2949a76539886.js","505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","684","static/chunks/684-16b194c83a169f6d.js","759","static/chunks/759-c0083d8a782d300e.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-8028473f1a04553d.js"],""]
|
||||
3:I[68101,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-626098dc8320c801.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[87494,["505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-a1942d43573c82c3.js"],""]
|
||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","505","static/chunks/505-5ff3c318fddfa35c.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-49a30e653b6ae929.js"],""]
|
||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["S9_6IC27HNWjJtr-LNaAO",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/b65d5698d1a1958d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -79,8 +79,8 @@ litellm_settings:
|
|||
failure_callback: ["langfuse"]
|
||||
cache: true
|
||||
|
||||
# general_settings:
|
||||
# alerting: ["email"]
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
# key_management_system: "aws_kms"
|
||||
# key_management_settings:
|
||||
# hosted_keys: ["LITELLM_MASTER_KEY"]
|
||||
|
|
|
@ -1358,10 +1358,11 @@ class CallInfo(LiteLLMBase):
|
|||
|
||||
spend: float
|
||||
max_budget: Optional[float] = None
|
||||
token: str = Field(description="Hashed value of that key")
|
||||
token: Optional[str] = Field(default=None, description="Hashed value of that key")
|
||||
customer_id: Optional[str] = None
|
||||
user_id: Optional[str] = None
|
||||
team_id: Optional[str] = None
|
||||
team_alias: Optional[str] = None
|
||||
user_email: Optional[str] = None
|
||||
key_alias: Optional[str] = None
|
||||
projected_exceeded_date: Optional[str] = None
|
||||
|
@ -1574,3 +1575,44 @@ class ManagementEndpointLoggingPayload(LiteLLMBase):
|
|||
exception: Optional[Any] = None
|
||||
start_time: Optional[datetime] = None
|
||||
end_time: Optional[datetime] = None
|
||||
|
||||
|
||||
class ProxyException(Exception):
|
||||
# NOTE: DO NOT MODIFY THIS
|
||||
# This is used to map exactly to OPENAI Exceptions
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
type: str,
|
||||
param: Optional[str],
|
||||
code: Optional[int],
|
||||
):
|
||||
self.message = message
|
||||
self.type = type
|
||||
self.param = param
|
||||
self.code = code
|
||||
|
||||
# rules for proxyExceptions
|
||||
# Litellm router.py returns "No healthy deployment available" when there are no deployments available
|
||||
# Should map to 429 errors https://github.com/BerriAI/litellm/issues/2487
|
||||
if (
|
||||
"No healthy deployment available" in self.message
|
||||
or "No deployments available" in self.message
|
||||
):
|
||||
self.code = 429
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Converts the ProxyException instance to a dictionary."""
|
||||
return {
|
||||
"message": self.message,
|
||||
"type": self.type,
|
||||
"param": self.param,
|
||||
"code": self.code,
|
||||
}
|
||||
|
||||
|
||||
class CommonProxyErrors(enum.Enum):
|
||||
db_not_connected_error = "DB not connected"
|
||||
no_llm_router = "No models configured on proxy"
|
||||
not_allowed_access = "Admin-only endpoint. Not allowed to access this."
|
||||
not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"
|
||||
|
|
1237
litellm/proxy/auth/user_api_key_auth.py
Normal file
1237
litellm/proxy/auth/user_api_key_auth.py
Normal file
File diff suppressed because it is too large
Load diff
194
litellm/proxy/caching_routes.py
Normal file
194
litellm/proxy/caching_routes.py
Normal file
|
@ -0,0 +1,194 @@
|
|||
from typing import Optional
|
||||
from fastapi import Depends, Request, APIRouter
|
||||
from fastapi import HTTPException
|
||||
import copy
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/cache",
|
||||
tags=["caching"],
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/ping",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def cache_ping():
|
||||
"""
|
||||
Endpoint for checking if cache can be pinged
|
||||
"""
|
||||
try:
|
||||
litellm_cache_params = {}
|
||||
specific_cache_params = {}
|
||||
|
||||
if litellm.cache is None:
|
||||
raise HTTPException(
|
||||
status_code=503, detail="Cache not initialized. litellm.cache is None"
|
||||
)
|
||||
|
||||
for k, v in vars(litellm.cache).items():
|
||||
try:
|
||||
if k == "cache":
|
||||
continue
|
||||
litellm_cache_params[k] = str(copy.deepcopy(v))
|
||||
except Exception:
|
||||
litellm_cache_params[k] = "<unable to copy or convert>"
|
||||
for k, v in vars(litellm.cache.cache).items():
|
||||
try:
|
||||
specific_cache_params[k] = str(v)
|
||||
except Exception:
|
||||
specific_cache_params[k] = "<unable to copy or convert>"
|
||||
if litellm.cache.type == "redis":
|
||||
# ping the redis cache
|
||||
ping_response = await litellm.cache.ping()
|
||||
verbose_proxy_logger.debug(
|
||||
"/cache/ping: ping_response: " + str(ping_response)
|
||||
)
|
||||
# making a set cache call
|
||||
# add cache does not return anything
|
||||
await litellm.cache.async_add_cache(
|
||||
result="test_key",
|
||||
model="test-model",
|
||||
messages=[{"role": "user", "content": "test from litellm"}],
|
||||
)
|
||||
verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
|
||||
return {
|
||||
"status": "healthy",
|
||||
"cache_type": litellm.cache.type,
|
||||
"ping_response": True,
|
||||
"set_cache_response": "success",
|
||||
"litellm_cache_params": litellm_cache_params,
|
||||
"redis_cache_params": specific_cache_params,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"status": "healthy",
|
||||
"cache_type": litellm.cache.type,
|
||||
"litellm_cache_params": litellm_cache_params,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Service Unhealthy ({str(e)}).Cache parameters: {litellm_cache_params}.specific_cache_params: {specific_cache_params}",
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/delete",
|
||||
tags=["caching"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def cache_delete(request: Request):
|
||||
"""
|
||||
Endpoint for deleting a key from the cache. All responses from litellm proxy have `x-litellm-cache-key` in the headers
|
||||
|
||||
Parameters:
|
||||
- **keys**: *Optional[List[str]]* - A list of keys to delete from the cache. Example {"keys": ["key1", "key2"]}
|
||||
|
||||
```shell
|
||||
curl -X POST "http://0.0.0.0:4000/cache/delete" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{"keys": ["key1", "key2"]}'
|
||||
```
|
||||
|
||||
"""
|
||||
try:
|
||||
if litellm.cache is None:
|
||||
raise HTTPException(
|
||||
status_code=503, detail="Cache not initialized. litellm.cache is None"
|
||||
)
|
||||
|
||||
request_data = await request.json()
|
||||
keys = request_data.get("keys", None)
|
||||
|
||||
if litellm.cache.type == "redis":
|
||||
await litellm.cache.delete_cache_keys(keys=keys)
|
||||
return {
|
||||
"status": "success",
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Cache type {litellm.cache.type} does not support deleting a key. only `redis` is supported",
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Cache Delete Failed({str(e)})",
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/redis/info",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def cache_redis_info():
|
||||
"""
|
||||
Endpoint for getting /redis/info
|
||||
"""
|
||||
try:
|
||||
if litellm.cache is None:
|
||||
raise HTTPException(
|
||||
status_code=503, detail="Cache not initialized. litellm.cache is None"
|
||||
)
|
||||
if litellm.cache.type == "redis":
|
||||
client_list = litellm.cache.cache.client_list()
|
||||
redis_info = litellm.cache.cache.info()
|
||||
num_clients = len(client_list)
|
||||
return {
|
||||
"num_clients": num_clients,
|
||||
"clients": client_list,
|
||||
"info": redis_info,
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Cache type {litellm.cache.type} does not support flushing",
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Service Unhealthy ({str(e)})",
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/flushall",
|
||||
tags=["caching"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def cache_flushall():
|
||||
"""
|
||||
A function to flush all items from the cache. (All items will be deleted from the cache with this)
|
||||
Raises HTTPException if the cache is not initialized or if the cache type does not support flushing.
|
||||
Returns a dictionary with the status of the operation.
|
||||
|
||||
Usage:
|
||||
```
|
||||
curl -X POST http://0.0.0.0:4000/cache/flushall -H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
"""
|
||||
try:
|
||||
if litellm.cache is None:
|
||||
raise HTTPException(
|
||||
status_code=503, detail="Cache not initialized. litellm.cache is None"
|
||||
)
|
||||
if litellm.cache.type == "redis":
|
||||
litellm.cache.cache.flushall()
|
||||
return {
|
||||
"status": "success",
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Cache type {litellm.cache.type} does not support flushing",
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Service Unhealthy ({str(e)})",
|
||||
)
|
|
@ -1,91 +0,0 @@
|
|||
from datetime import datetime
|
||||
from functools import wraps
|
||||
from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from fastapi import Request
|
||||
|
||||
|
||||
def management_endpoint_wrapper(func):
|
||||
"""
|
||||
This wrapper does the following:
|
||||
|
||||
1. Log I/O, Exceptions to OTEL
|
||||
2. Create an Audit log for success calls
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
end_time = datetime.now()
|
||||
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
user_api_key_dict: UserAPIKeyAuth = (
|
||||
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
|
||||
)
|
||||
parent_otel_span = user_api_key_dict.parent_otel_span
|
||||
if parent_otel_span is not None:
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
if open_telemetry_logger is not None:
|
||||
_http_request: Request = kwargs.get("http_request")
|
||||
if _http_request:
|
||||
_route = _http_request.url.path
|
||||
_request_body: dict = await _read_request_body(
|
||||
request=_http_request
|
||||
)
|
||||
_response = dict(result) if result is not None else None
|
||||
|
||||
logging_payload = ManagementEndpointLoggingPayload(
|
||||
route=_route,
|
||||
request_data=_request_body,
|
||||
response=_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
await open_telemetry_logger.async_management_endpoint_success_hook(
|
||||
logging_payload=logging_payload,
|
||||
parent_otel_span=parent_otel_span,
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
end_time = datetime.now()
|
||||
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
user_api_key_dict: UserAPIKeyAuth = (
|
||||
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
|
||||
)
|
||||
parent_otel_span = user_api_key_dict.parent_otel_span
|
||||
if parent_otel_span is not None:
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
if open_telemetry_logger is not None:
|
||||
_http_request: Request = kwargs.get("http_request")
|
||||
if _http_request:
|
||||
_route = _http_request.url.path
|
||||
_request_body: dict = await _read_request_body(
|
||||
request=_http_request
|
||||
)
|
||||
logging_payload = ManagementEndpointLoggingPayload(
|
||||
route=_route,
|
||||
request_data=_request_body,
|
||||
response=None,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
await open_telemetry_logger.async_management_endpoint_failure_hook(
|
||||
logging_payload=logging_payload,
|
||||
parent_otel_span=parent_otel_span,
|
||||
)
|
||||
|
||||
raise e
|
||||
|
||||
return wrapper
|
478
litellm/proxy/health_endpoints/_health_endpoints.py
Normal file
478
litellm/proxy/health_endpoints/_health_endpoints.py
Normal file
|
@ -0,0 +1,478 @@
|
|||
from typing import Optional, Literal
|
||||
import litellm
|
||||
import os
|
||||
import asyncio
|
||||
import fastapi
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
from fastapi import Depends, Request, APIRouter, Header, status
|
||||
from litellm.proxy.health_check import perform_health_check
|
||||
from fastapi import HTTPException
|
||||
import copy
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy._types import (
|
||||
UserAPIKeyAuth,
|
||||
ProxyException,
|
||||
WebhookEvent,
|
||||
CallInfo,
|
||||
)
|
||||
|
||||
#### Health ENDPOINTS ####
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/test",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def test_endpoint(request: Request):
|
||||
"""
|
||||
[DEPRECATED] use `/health/liveliness` instead.
|
||||
|
||||
A test endpoint that pings the proxy server to check if it's healthy.
|
||||
|
||||
Parameters:
|
||||
request (Request): The incoming request.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the route of the request URL.
|
||||
"""
|
||||
# ping the proxy server to check if its healthy
|
||||
return {"route": request.url.path}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health/services",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def health_services_endpoint(
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
service: Literal[
|
||||
"slack_budget_alerts", "langfuse", "slack", "openmeter", "webhook", "email"
|
||||
] = fastapi.Query(description="Specify the service being hit."),
|
||||
):
|
||||
"""
|
||||
Hidden endpoint.
|
||||
|
||||
Used by the UI to let user check if slack alerting is working as expected.
|
||||
"""
|
||||
try:
|
||||
from litellm.proxy.proxy_server import (
|
||||
proxy_logging_obj,
|
||||
prisma_client,
|
||||
general_settings,
|
||||
)
|
||||
|
||||
if service is None:
|
||||
raise HTTPException(
|
||||
status_code=400, detail={"error": "Service must be specified."}
|
||||
)
|
||||
if service not in [
|
||||
"slack_budget_alerts",
|
||||
"email",
|
||||
"langfuse",
|
||||
"slack",
|
||||
"openmeter",
|
||||
"webhook",
|
||||
]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Service must be in list. Service={service}. List={['slack_budget_alerts']}"
|
||||
},
|
||||
)
|
||||
|
||||
if service == "openmeter":
|
||||
_ = await litellm.acompletion(
|
||||
model="openai/litellm-mock-response-model",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
user="litellm:/health/services",
|
||||
mock_response="This is a mock response",
|
||||
)
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Mock LLM request made - check openmeter.",
|
||||
}
|
||||
|
||||
if service == "langfuse":
|
||||
from litellm.integrations.langfuse import LangFuseLogger
|
||||
|
||||
langfuse_logger = LangFuseLogger()
|
||||
langfuse_logger.Langfuse.auth_check()
|
||||
_ = litellm.completion(
|
||||
model="openai/litellm-mock-response-model",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
user="litellm:/health/services",
|
||||
mock_response="This is a mock response",
|
||||
)
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Mock LLM request made - check langfuse.",
|
||||
}
|
||||
|
||||
if service == "webhook":
|
||||
user_info = CallInfo(
|
||||
token=user_api_key_dict.token or "",
|
||||
spend=1,
|
||||
max_budget=0,
|
||||
user_id=user_api_key_dict.user_id,
|
||||
key_alias=user_api_key_dict.key_alias,
|
||||
team_id=user_api_key_dict.team_id,
|
||||
)
|
||||
await proxy_logging_obj.budget_alerts(
|
||||
type="user_budget",
|
||||
user_info=user_info,
|
||||
)
|
||||
|
||||
if service == "slack" or service == "slack_budget_alerts":
|
||||
if "slack" in general_settings.get("alerting", []):
|
||||
# test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700"""
|
||||
# check if user has opted into unique_alert_webhooks
|
||||
if (
|
||||
proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url
|
||||
is not None
|
||||
):
|
||||
for (
|
||||
alert_type
|
||||
) in proxy_logging_obj.slack_alerting_instance.alert_to_webhook_url:
|
||||
"""
|
||||
"llm_exceptions",
|
||||
"llm_too_slow",
|
||||
"llm_requests_hanging",
|
||||
"budget_alerts",
|
||||
"db_exceptions",
|
||||
"""
|
||||
# only test alert if it's in active alert types
|
||||
if (
|
||||
proxy_logging_obj.slack_alerting_instance.alert_types
|
||||
is not None
|
||||
and alert_type
|
||||
not in proxy_logging_obj.slack_alerting_instance.alert_types
|
||||
):
|
||||
continue
|
||||
test_message = "default test message"
|
||||
if alert_type == "llm_exceptions":
|
||||
test_message = f"LLM Exception test alert"
|
||||
elif alert_type == "llm_too_slow":
|
||||
test_message = f"LLM Too Slow test alert"
|
||||
elif alert_type == "llm_requests_hanging":
|
||||
test_message = f"LLM Requests Hanging test alert"
|
||||
elif alert_type == "budget_alerts":
|
||||
test_message = f"Budget Alert test alert"
|
||||
elif alert_type == "db_exceptions":
|
||||
test_message = f"DB Exception test alert"
|
||||
elif alert_type == "outage_alerts":
|
||||
test_message = f"Outage Alert Exception test alert"
|
||||
elif alert_type == "daily_reports":
|
||||
test_message = f"Daily Reports test alert"
|
||||
|
||||
await proxy_logging_obj.alerting_handler(
|
||||
message=test_message, level="Low", alert_type=alert_type
|
||||
)
|
||||
else:
|
||||
await proxy_logging_obj.alerting_handler(
|
||||
message="This is a test slack alert message",
|
||||
level="Low",
|
||||
alert_type="budget_alerts",
|
||||
)
|
||||
|
||||
if prisma_client is not None:
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.slack_alerting_instance.send_monthly_spend_report()
|
||||
)
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.slack_alerting_instance.send_weekly_spend_report()
|
||||
)
|
||||
|
||||
alert_types = (
|
||||
proxy_logging_obj.slack_alerting_instance.alert_types or []
|
||||
)
|
||||
alert_types = list(alert_types)
|
||||
return {
|
||||
"status": "success",
|
||||
"alert_types": alert_types,
|
||||
"message": "Mock Slack Alert sent, verify Slack Alert Received on your channel",
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail={
|
||||
"error": '"{}" not in proxy config: general_settings. Unable to test this.'.format(
|
||||
service
|
||||
)
|
||||
},
|
||||
)
|
||||
if service == "email":
|
||||
webhook_event = WebhookEvent(
|
||||
event="key_created",
|
||||
event_group="key",
|
||||
event_message="Test Email Alert",
|
||||
token=user_api_key_dict.token or "",
|
||||
key_alias="Email Test key (This is only a test alert key. DO NOT USE THIS IN PRODUCTION.)",
|
||||
spend=0,
|
||||
max_budget=0,
|
||||
user_id=user_api_key_dict.user_id,
|
||||
user_email=os.getenv("TEST_EMAIL_ADDRESS"),
|
||||
team_id=user_api_key_dict.team_id,
|
||||
)
|
||||
|
||||
# use create task - this can take 10 seconds. don't keep ui users waiting for notification to check their email
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email(
|
||||
webhook_event=webhook_event
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Mock Email Alert sent, verify Email Alert Received",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Authentication Error, " + str(e),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/health", tags=["health"], dependencies=[Depends(user_api_key_auth)])
|
||||
async def health_endpoint(
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
model: Optional[str] = fastapi.Query(
|
||||
None, description="Specify the model name (optional)"
|
||||
),
|
||||
):
|
||||
"""
|
||||
🚨 USE `/health/liveliness` to health check the proxy 🚨
|
||||
|
||||
See more 👉 https://docs.litellm.ai/docs/proxy/health
|
||||
|
||||
|
||||
Check the health of all the endpoints in config.yaml
|
||||
|
||||
To run health checks in the background, add this to config.yaml:
|
||||
```
|
||||
general_settings:
|
||||
# ... other settings
|
||||
background_health_checks: True
|
||||
```
|
||||
else, the health checks will be run on models when /health is called.
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
health_check_results,
|
||||
use_background_health_checks,
|
||||
user_model,
|
||||
llm_model_list,
|
||||
)
|
||||
|
||||
try:
|
||||
if llm_model_list is None:
|
||||
# if no router set, check if user set a model using litellm --model ollama/llama2
|
||||
if user_model is not None:
|
||||
healthy_endpoints, unhealthy_endpoints = await perform_health_check(
|
||||
model_list=[], cli_model=user_model
|
||||
)
|
||||
return {
|
||||
"healthy_endpoints": healthy_endpoints,
|
||||
"unhealthy_endpoints": unhealthy_endpoints,
|
||||
"healthy_count": len(healthy_endpoints),
|
||||
"unhealthy_count": len(unhealthy_endpoints),
|
||||
}
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail={"error": "Model list not initialized"},
|
||||
)
|
||||
_llm_model_list = copy.deepcopy(llm_model_list)
|
||||
### FILTER MODELS FOR ONLY THOSE USER HAS ACCESS TO ###
|
||||
if len(user_api_key_dict.models) > 0:
|
||||
allowed_model_names = user_api_key_dict.models
|
||||
else:
|
||||
allowed_model_names = [] #
|
||||
if use_background_health_checks:
|
||||
return health_check_results
|
||||
else:
|
||||
healthy_endpoints, unhealthy_endpoints = await perform_health_check(
|
||||
_llm_model_list, model
|
||||
)
|
||||
|
||||
return {
|
||||
"healthy_endpoints": healthy_endpoints,
|
||||
"unhealthy_endpoints": unhealthy_endpoints,
|
||||
"healthy_count": len(healthy_endpoints),
|
||||
"unhealthy_count": len(unhealthy_endpoints),
|
||||
}
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
raise e
|
||||
|
||||
|
||||
db_health_cache = {"status": "unknown", "last_updated": datetime.now()}
|
||||
|
||||
|
||||
def _db_health_readiness_check():
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
global db_health_cache
|
||||
|
||||
# Note - Intentionally don't try/except this so it raises an exception when it fails
|
||||
|
||||
# if timedelta is less than 2 minutes return DB Status
|
||||
time_diff = datetime.now() - db_health_cache["last_updated"]
|
||||
if db_health_cache["status"] != "unknown" and time_diff < timedelta(minutes=2):
|
||||
return db_health_cache
|
||||
prisma_client.health_check()
|
||||
db_health_cache = {"status": "connected", "last_updated": datetime.now()}
|
||||
return db_health_cache
|
||||
|
||||
|
||||
@router.get(
|
||||
"/active/callbacks",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def active_callbacks():
|
||||
"""
|
||||
Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback
|
||||
"""
|
||||
from litellm.proxy.proxy_server import proxy_logging_obj, general_settings
|
||||
|
||||
_alerting = str(general_settings.get("alerting"))
|
||||
# get success callbacks
|
||||
|
||||
litellm_callbacks = [str(x) for x in litellm.callbacks]
|
||||
litellm_input_callbacks = [str(x) for x in litellm.input_callback]
|
||||
litellm_failure_callbacks = [str(x) for x in litellm.failure_callback]
|
||||
litellm_success_callbacks = [str(x) for x in litellm.success_callback]
|
||||
litellm_async_success_callbacks = [str(x) for x in litellm._async_success_callback]
|
||||
litellm_async_failure_callbacks = [str(x) for x in litellm._async_failure_callback]
|
||||
litellm_async_input_callbacks = [str(x) for x in litellm._async_input_callback]
|
||||
|
||||
all_litellm_callbacks = (
|
||||
litellm_callbacks
|
||||
+ litellm_input_callbacks
|
||||
+ litellm_failure_callbacks
|
||||
+ litellm_success_callbacks
|
||||
+ litellm_async_success_callbacks
|
||||
+ litellm_async_failure_callbacks
|
||||
+ litellm_async_input_callbacks
|
||||
)
|
||||
|
||||
alerting = proxy_logging_obj.alerting
|
||||
_num_alerting = 0
|
||||
if alerting and isinstance(alerting, list):
|
||||
_num_alerting = len(alerting)
|
||||
|
||||
return {
|
||||
"alerting": _alerting,
|
||||
"litellm.callbacks": litellm_callbacks,
|
||||
"litellm.input_callback": litellm_input_callbacks,
|
||||
"litellm.failure_callback": litellm_failure_callbacks,
|
||||
"litellm.success_callback": litellm_success_callbacks,
|
||||
"litellm._async_success_callback": litellm_async_success_callbacks,
|
||||
"litellm._async_failure_callback": litellm_async_failure_callbacks,
|
||||
"litellm._async_input_callback": litellm_async_input_callbacks,
|
||||
"all_litellm_callbacks": all_litellm_callbacks,
|
||||
"num_callbacks": len(all_litellm_callbacks),
|
||||
"num_alerting": _num_alerting,
|
||||
}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health/readiness",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def health_readiness():
|
||||
"""
|
||||
Unprotected endpoint for checking if worker can receive requests
|
||||
"""
|
||||
from litellm.proxy.proxy_server import proxy_logging_obj, prisma_client, version
|
||||
|
||||
try:
|
||||
# get success callback
|
||||
success_callback_names = []
|
||||
|
||||
try:
|
||||
# this was returning a JSON of the values in some of the callbacks
|
||||
# all we need is the callback name, hence we do str(callback)
|
||||
success_callback_names = [str(x) for x in litellm.success_callback]
|
||||
except:
|
||||
# don't let this block the /health/readiness response, if we can't convert to str -> return litellm.success_callback
|
||||
success_callback_names = litellm.success_callback
|
||||
|
||||
# check Cache
|
||||
cache_type = None
|
||||
if litellm.cache is not None:
|
||||
from litellm.caching import RedisSemanticCache
|
||||
|
||||
cache_type = litellm.cache.type
|
||||
|
||||
if isinstance(litellm.cache.cache, RedisSemanticCache):
|
||||
# ping the cache
|
||||
# TODO: @ishaan-jaff - we should probably not ping the cache on every /health/readiness check
|
||||
try:
|
||||
index_info = await litellm.cache.cache._index_info()
|
||||
except Exception as e:
|
||||
index_info = "index does not exist - error: " + str(e)
|
||||
cache_type = {"type": cache_type, "index_info": index_info}
|
||||
|
||||
# check DB
|
||||
if prisma_client is not None: # if db passed in, check if it's connected
|
||||
db_health_status = _db_health_readiness_check()
|
||||
return {
|
||||
"status": "healthy",
|
||||
"db": "connected",
|
||||
"cache": cache_type,
|
||||
"litellm_version": version,
|
||||
"success_callbacks": success_callback_names,
|
||||
**db_health_status,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"status": "healthy",
|
||||
"db": "Not connected",
|
||||
"cache": cache_type,
|
||||
"litellm_version": version,
|
||||
"success_callbacks": success_callback_names,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health/liveliness",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def health_liveliness():
|
||||
"""
|
||||
Unprotected endpoint for checking if worker is alive
|
||||
"""
|
||||
return "I'm alive!"
|
926
litellm/proxy/management_endpoints/key_management_endpoints.py
Normal file
926
litellm/proxy/management_endpoints/key_management_endpoints.py
Normal file
|
@ -0,0 +1,926 @@
|
|||
"""
|
||||
KEY MANAGEMENT
|
||||
|
||||
All /key management endpoints
|
||||
|
||||
/key/generate
|
||||
/key/info
|
||||
/key/update
|
||||
/key/delete
|
||||
"""
|
||||
|
||||
import copy
|
||||
import json
|
||||
import uuid
|
||||
import re
|
||||
import traceback
|
||||
import asyncio
|
||||
import secrets
|
||||
from typing import Optional, List
|
||||
import fastapi
|
||||
from fastapi import Depends, Request, APIRouter, Header, status
|
||||
from fastapi import HTTPException
|
||||
import litellm
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy._types import *
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/key/generate",
|
||||
tags=["key management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=GenerateKeyResponse,
|
||||
)
|
||||
async def generate_key_fn(
|
||||
data: GenerateKeyRequest,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Generate an API key based on the provided data.
|
||||
|
||||
Docs: https://docs.litellm.ai/docs/proxy/virtual_keys
|
||||
|
||||
Parameters:
|
||||
- duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||
- key_alias: Optional[str] - User defined key alias
|
||||
- team_id: Optional[str] - The team id of the key
|
||||
- user_id: Optional[str] - The user id of the key
|
||||
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
||||
- aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
|
||||
- config: Optional[dict] - any key-specific configs, overrides config in config.yaml
|
||||
- spend: Optional[int] - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
||||
- send_invite_email: Optional[bool] - Whether to send an invite email to the user_id, with the generate key
|
||||
- max_budget: Optional[float] - Specify max budget for a given key.
|
||||
- max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
||||
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
|
||||
- model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
|
||||
|
||||
Examples:
|
||||
|
||||
1. Allow users to turn on/off pii masking
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"permissions": {"allow_pii_controls": true}
|
||||
}'
|
||||
```
|
||||
|
||||
Returns:
|
||||
- key: (str) The generated api key
|
||||
- expires: (datetime) Datetime object for when key expires.
|
||||
- user_id: (str) Unique user id - used for tracking spend across multiple keys for same user id.
|
||||
"""
|
||||
try:
|
||||
from litellm.proxy.proxy_server import (
|
||||
user_custom_key_generate,
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
general_settings,
|
||||
proxy_logging_obj,
|
||||
create_audit_log_for_update,
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug("entered /key/generate")
|
||||
|
||||
if user_custom_key_generate is not None:
|
||||
result = await user_custom_key_generate(data)
|
||||
decision = result.get("decision", True)
|
||||
message = result.get("message", "Authentication Failed - Custom Auth Rule")
|
||||
if not decision:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN, detail=message
|
||||
)
|
||||
# check if user set default key/generate params on config.yaml
|
||||
if litellm.default_key_generate_params is not None:
|
||||
for elem in data:
|
||||
key, value = elem
|
||||
if value is None and key in [
|
||||
"max_budget",
|
||||
"user_id",
|
||||
"team_id",
|
||||
"max_parallel_requests",
|
||||
"tpm_limit",
|
||||
"rpm_limit",
|
||||
"budget_duration",
|
||||
]:
|
||||
setattr(
|
||||
data, key, litellm.default_key_generate_params.get(key, None)
|
||||
)
|
||||
elif key == "models" and value == []:
|
||||
setattr(data, key, litellm.default_key_generate_params.get(key, []))
|
||||
elif key == "metadata" and value == {}:
|
||||
setattr(data, key, litellm.default_key_generate_params.get(key, {}))
|
||||
|
||||
# check if user set default key/generate params on config.yaml
|
||||
if litellm.upperbound_key_generate_params is not None:
|
||||
for elem in data:
|
||||
# if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
|
||||
key, value = elem
|
||||
if (
|
||||
value is not None
|
||||
and getattr(litellm.upperbound_key_generate_params, key, None)
|
||||
is not None
|
||||
):
|
||||
# if value is float/int
|
||||
if key in [
|
||||
"max_budget",
|
||||
"max_parallel_requests",
|
||||
"tpm_limit",
|
||||
"rpm_limit",
|
||||
]:
|
||||
if value > getattr(litellm.upperbound_key_generate_params, key):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"{key} is over max limit set in config - user_value={value}; max_value={getattr(litellm.upperbound_key_generate_params, key)}"
|
||||
},
|
||||
)
|
||||
elif key == "budget_duration":
|
||||
# budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
|
||||
# compare the duration in seconds and max duration in seconds
|
||||
upperbound_budget_duration = _duration_in_seconds(
|
||||
duration=getattr(
|
||||
litellm.upperbound_key_generate_params, key
|
||||
)
|
||||
)
|
||||
user_set_budget_duration = _duration_in_seconds(duration=value)
|
||||
if user_set_budget_duration > upperbound_budget_duration:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Budget duration is over max limit set in config - user_value={user_set_budget_duration}; max_value={upperbound_budget_duration}"
|
||||
},
|
||||
)
|
||||
|
||||
# TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable
|
||||
_budget_id = None
|
||||
if prisma_client is not None and data.soft_budget is not None:
|
||||
# create the Budget Row for the LiteLLM Verification Token
|
||||
budget_row = LiteLLM_BudgetTable(
|
||||
soft_budget=data.soft_budget,
|
||||
model_max_budget=data.model_max_budget or {},
|
||||
)
|
||||
new_budget = prisma_client.jsonify_object(
|
||||
budget_row.json(exclude_none=True)
|
||||
)
|
||||
|
||||
_budget = await prisma_client.db.litellm_budgettable.create(
|
||||
data={
|
||||
**new_budget, # type: ignore
|
||||
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
}
|
||||
)
|
||||
_budget_id = getattr(_budget, "budget_id", None)
|
||||
data_json = data.json() # type: ignore
|
||||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||
if "max_budget" in data_json:
|
||||
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
||||
if _budget_id is not None:
|
||||
data_json["budget_id"] = _budget_id
|
||||
|
||||
if "budget_duration" in data_json:
|
||||
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
||||
|
||||
response = await generate_key_helper_fn(
|
||||
request_type="key", **data_json, table_name="key"
|
||||
)
|
||||
|
||||
response["soft_budget"] = (
|
||||
data.soft_budget
|
||||
) # include the user-input soft budget in the response
|
||||
|
||||
if data.send_invite_email is True:
|
||||
if "email" not in general_settings.get("alerting", []):
|
||||
raise ValueError(
|
||||
"Email alerting not setup on config.yaml. Please set `alerting=['email']. \nDocs: https://docs.litellm.ai/docs/proxy/email`"
|
||||
)
|
||||
event = WebhookEvent(
|
||||
event="key_created",
|
||||
event_group="key",
|
||||
event_message=f"API Key Created",
|
||||
token=response.get("token", ""),
|
||||
spend=response.get("spend", 0.0),
|
||||
max_budget=response.get("max_budget", 0.0),
|
||||
user_id=response.get("user_id", None),
|
||||
team_id=response.get("team_id", "Default Team"),
|
||||
key_alias=response.get("key_alias", None),
|
||||
)
|
||||
|
||||
# If user configured email alerting - send an Email letting their end-user know the key was created
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.slack_alerting_instance.send_key_created_or_user_invited_email(
|
||||
webhook_event=event,
|
||||
)
|
||||
)
|
||||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = json.dumps(response, default=str)
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
request_data=LiteLLM_AuditLogs(
|
||||
id=str(uuid.uuid4()),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
changed_by=litellm_changed_by
|
||||
or user_api_key_dict.user_id
|
||||
or litellm_proxy_admin_name,
|
||||
changed_by_api_key=user_api_key_dict.api_key,
|
||||
table_name=LitellmTableNames.KEY_TABLE_NAME,
|
||||
object_id=response.get("token_id", ""),
|
||||
action="created",
|
||||
updated_values=_updated_values,
|
||||
before_value=None,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
return GenerateKeyResponse(**response)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Authentication Error, " + str(e),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
async def update_key_fn(
|
||||
request: Request,
|
||||
data: UpdateKeyRequest,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Update an existing key
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
user_custom_key_generate,
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
general_settings,
|
||||
proxy_logging_obj,
|
||||
create_audit_log_for_update,
|
||||
user_api_key_cache,
|
||||
)
|
||||
|
||||
try:
|
||||
data_json: dict = data.json()
|
||||
key = data_json.pop("key")
|
||||
# get the row from db
|
||||
if prisma_client is None:
|
||||
raise Exception("Not connected to DB!")
|
||||
|
||||
existing_key_row = await prisma_client.get_data(
|
||||
token=data.key, table_name="key", query_type="find_unique"
|
||||
)
|
||||
|
||||
if existing_key_row is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"error": f"Team not found, passed team_id={data.team_id}"},
|
||||
)
|
||||
|
||||
# get non default values for key
|
||||
non_default_values = {}
|
||||
for k, v in data_json.items():
|
||||
if v is not None and v not in (
|
||||
[],
|
||||
{},
|
||||
0,
|
||||
): # models default to [], spend defaults to 0, we should not reset these values
|
||||
non_default_values[k] = v
|
||||
|
||||
if "duration" in non_default_values:
|
||||
duration = non_default_values.pop("duration")
|
||||
duration_s = _duration_in_seconds(duration=duration)
|
||||
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
non_default_values["expires"] = expires
|
||||
|
||||
response = await prisma_client.update_data(
|
||||
token=key, data={**non_default_values, "token": key}
|
||||
)
|
||||
|
||||
# Delete - key from cache, since it's been updated!
|
||||
# key updated - a new model could have been added to this key. it should not block requests after this is done
|
||||
user_api_key_cache.delete_cache(key)
|
||||
hashed_token = hash_token(key)
|
||||
user_api_key_cache.delete_cache(hashed_token)
|
||||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = json.dumps(data_json, default=str)
|
||||
|
||||
_before_value = existing_key_row.json(exclude_none=True)
|
||||
_before_value = json.dumps(_before_value, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
request_data=LiteLLM_AuditLogs(
|
||||
id=str(uuid.uuid4()),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
changed_by=litellm_changed_by
|
||||
or user_api_key_dict.user_id
|
||||
or litellm_proxy_admin_name,
|
||||
changed_by_api_key=user_api_key_dict.api_key,
|
||||
table_name=LitellmTableNames.KEY_TABLE_NAME,
|
||||
object_id=data.key,
|
||||
action="updated",
|
||||
updated_values=_updated_values,
|
||||
before_value=_before_value,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
return {"key": key, **response["data"]}
|
||||
# update based on remaining passed in values
|
||||
except Exception as e:
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Authentication Error, " + str(e),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/key/delete", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
async def delete_key_fn(
|
||||
data: KeyRequest,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Delete a key from the key management system.
|
||||
|
||||
Parameters::
|
||||
- keys (List[str]): A list of keys or hashed keys to delete. Example {"keys": ["sk-QWrxEynunsNpV1zT48HIrw", "837e17519f44683334df5291321d97b8bf1098cd490e49e215f6fea935aa28be"]}
|
||||
|
||||
Returns:
|
||||
- deleted_keys (List[str]): A list of deleted keys. Example {"deleted_keys": ["sk-QWrxEynunsNpV1zT48HIrw", "837e17519f44683334df5291321d97b8bf1098cd490e49e215f6fea935aa28be"]}
|
||||
|
||||
|
||||
Raises:
|
||||
HTTPException: If an error occurs during key deletion.
|
||||
"""
|
||||
try:
|
||||
from litellm.proxy.proxy_server import (
|
||||
user_custom_key_generate,
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
general_settings,
|
||||
proxy_logging_obj,
|
||||
create_audit_log_for_update,
|
||||
user_api_key_cache,
|
||||
)
|
||||
|
||||
keys = data.keys
|
||||
if len(keys) == 0:
|
||||
raise ProxyException(
|
||||
message=f"No keys provided, passed in: keys={keys}",
|
||||
type="auth_error",
|
||||
param="keys",
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
## only allow user to delete keys they own
|
||||
user_id = user_api_key_dict.user_id
|
||||
verbose_proxy_logger.debug(
|
||||
f"user_api_key_dict.user_role: {user_api_key_dict.user_role}"
|
||||
)
|
||||
if (
|
||||
user_api_key_dict.user_role is not None
|
||||
and user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
|
||||
):
|
||||
user_id = None # unless they're admin
|
||||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
|
||||
if litellm.store_audit_logs is True:
|
||||
# make an audit log for each team deleted
|
||||
for key in data.keys:
|
||||
key_row = await prisma_client.get_data( # type: ignore
|
||||
token=key, table_name="key", query_type="find_unique"
|
||||
)
|
||||
|
||||
key_row = key_row.json(exclude_none=True)
|
||||
_key_row = json.dumps(key_row, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
request_data=LiteLLM_AuditLogs(
|
||||
id=str(uuid.uuid4()),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
changed_by=litellm_changed_by
|
||||
or user_api_key_dict.user_id
|
||||
or litellm_proxy_admin_name,
|
||||
changed_by_api_key=user_api_key_dict.api_key,
|
||||
table_name=LitellmTableNames.KEY_TABLE_NAME,
|
||||
object_id=key,
|
||||
action="deleted",
|
||||
updated_values="{}",
|
||||
before_value=_key_row,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
number_deleted_keys = await delete_verification_token(
|
||||
tokens=keys, user_id=user_id
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
f"/key/delete - deleted_keys={number_deleted_keys['deleted_keys']}"
|
||||
)
|
||||
|
||||
try:
|
||||
assert len(keys) == number_deleted_keys["deleted_keys"]
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Not all keys passed in were deleted. This probably means you don't have access to delete all the keys passed in. Keys passed in={len(keys)}, Deleted keys ={number_deleted_keys['deleted_keys']}"
|
||||
},
|
||||
)
|
||||
|
||||
for key in keys:
|
||||
user_api_key_cache.delete_cache(key)
|
||||
# remove hash token from cache
|
||||
hashed_token = hash_token(key)
|
||||
user_api_key_cache.delete_cache(hashed_token)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
f"/keys/delete - cache after delete: {user_api_key_cache.in_memory_cache.cache_dict}"
|
||||
)
|
||||
|
||||
return {"deleted_keys": keys}
|
||||
except Exception as e:
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Authentication Error, " + str(e),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v2/key/info", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
async def info_key_fn_v2(
|
||||
data: Optional[KeyRequest] = None,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Retrieve information about a list of keys.
|
||||
|
||||
**New endpoint**. Currently admin only.
|
||||
Parameters:
|
||||
keys: Optional[list] = body parameter representing the key(s) in the request
|
||||
user_api_key_dict: UserAPIKeyAuth = Dependency representing the user's API key
|
||||
Returns:
|
||||
Dict containing the key and its associated information
|
||||
|
||||
Example Curl:
|
||||
```
|
||||
curl -X GET "http://0.0.0.0:8000/key/info" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d {"keys": ["sk-1", "sk-2", "sk-3"]}
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
user_custom_key_generate,
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
general_settings,
|
||||
proxy_logging_obj,
|
||||
create_audit_log_for_update,
|
||||
)
|
||||
|
||||
try:
|
||||
if prisma_client is None:
|
||||
raise Exception(
|
||||
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||
)
|
||||
if data is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
detail={"message": "Malformed request. No keys passed in."},
|
||||
)
|
||||
|
||||
key_info = await prisma_client.get_data(
|
||||
token=data.keys, table_name="key", query_type="find_all"
|
||||
)
|
||||
filtered_key_info = []
|
||||
for k in key_info:
|
||||
try:
|
||||
k = k.model_dump() # noqa
|
||||
except:
|
||||
# if using pydantic v1
|
||||
k = k.dict()
|
||||
filtered_key_info.append(k)
|
||||
return {"key": data.keys, "info": filtered_key_info}
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Authentication Error, " + str(e),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/key/info", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
async def info_key_fn(
|
||||
key: Optional[str] = fastapi.Query(
|
||||
default=None, description="Key in the request parameters"
|
||||
),
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Retrieve information about a key.
|
||||
Parameters:
|
||||
key: Optional[str] = Query parameter representing the key in the request
|
||||
user_api_key_dict: UserAPIKeyAuth = Dependency representing the user's API key
|
||||
Returns:
|
||||
Dict containing the key and its associated information
|
||||
|
||||
Example Curl:
|
||||
```
|
||||
curl -X GET "http://0.0.0.0:8000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
Example Curl - if no key is passed, it will use the Key Passed in Authorization Header
|
||||
```
|
||||
curl -X GET "http://0.0.0.0:8000/key/info" \
|
||||
-H "Authorization: Bearer sk-02Wr4IAlN3NvPXvL5JVvDA"
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
user_custom_key_generate,
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
general_settings,
|
||||
proxy_logging_obj,
|
||||
create_audit_log_for_update,
|
||||
)
|
||||
|
||||
try:
|
||||
if prisma_client is None:
|
||||
raise Exception(
|
||||
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||
)
|
||||
if key == None:
|
||||
key = user_api_key_dict.api_key
|
||||
key_info = await prisma_client.get_data(token=key)
|
||||
## REMOVE HASHED TOKEN INFO BEFORE RETURNING ##
|
||||
try:
|
||||
key_info = key_info.model_dump() # noqa
|
||||
except:
|
||||
# if using pydantic v1
|
||||
key_info = key_info.dict()
|
||||
key_info.pop("token")
|
||||
return {"key": key, "info": key_info}
|
||||
except Exception as e:
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Authentication Error, " + str(e),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
def _duration_in_seconds(duration: str):
|
||||
match = re.match(r"(\d+)([smhd]?)", duration)
|
||||
if not match:
|
||||
raise ValueError("Invalid duration format")
|
||||
|
||||
value, unit = match.groups()
|
||||
value = int(value)
|
||||
|
||||
if unit == "s":
|
||||
return value
|
||||
elif unit == "m":
|
||||
return value * 60
|
||||
elif unit == "h":
|
||||
return value * 3600
|
||||
elif unit == "d":
|
||||
return value * 86400
|
||||
else:
|
||||
raise ValueError("Unsupported duration unit")
|
||||
|
||||
|
||||
async def generate_key_helper_fn(
|
||||
request_type: Literal[
|
||||
"user", "key"
|
||||
], # identifies if this request is from /user/new or /key/generate
|
||||
duration: Optional[str],
|
||||
models: list,
|
||||
aliases: dict,
|
||||
config: dict,
|
||||
spend: float,
|
||||
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
|
||||
key_budget_duration: Optional[str] = None,
|
||||
budget_id: Optional[float] = None, # budget id <-> LiteLLM_BudgetTable
|
||||
soft_budget: Optional[
|
||||
float
|
||||
] = None, # soft_budget is used to set soft Budgets Per user
|
||||
max_budget: Optional[float] = None, # max_budget is used to Budget Per user
|
||||
budget_duration: Optional[str] = None, # max_budget is used to Budget Per user
|
||||
token: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
team_id: Optional[str] = None,
|
||||
user_email: Optional[str] = None,
|
||||
user_role: Optional[str] = None,
|
||||
max_parallel_requests: Optional[int] = None,
|
||||
metadata: Optional[dict] = {},
|
||||
tpm_limit: Optional[int] = None,
|
||||
rpm_limit: Optional[int] = None,
|
||||
query_type: Literal["insert_data", "update_data"] = "insert_data",
|
||||
update_key_values: Optional[dict] = None,
|
||||
key_alias: Optional[str] = None,
|
||||
allowed_cache_controls: Optional[list] = [],
|
||||
permissions: Optional[dict] = {},
|
||||
model_max_budget: Optional[dict] = {},
|
||||
teams: Optional[list] = None,
|
||||
organization_id: Optional[str] = None,
|
||||
table_name: Optional[Literal["key", "user"]] = None,
|
||||
send_invite_email: Optional[bool] = None,
|
||||
):
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
custom_db_client,
|
||||
litellm_proxy_budget_name,
|
||||
premium_user,
|
||||
)
|
||||
|
||||
if prisma_client is None and custom_db_client is None:
|
||||
raise Exception(
|
||||
f"Connect Proxy to database to generate keys - https://docs.litellm.ai/docs/proxy/virtual_keys "
|
||||
)
|
||||
|
||||
if token is None:
|
||||
token = f"sk-{secrets.token_urlsafe(16)}"
|
||||
|
||||
if duration is None: # allow tokens that never expire
|
||||
expires = None
|
||||
else:
|
||||
duration_s = _duration_in_seconds(duration=duration)
|
||||
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
|
||||
if key_budget_duration is None: # one-time budget
|
||||
key_reset_at = None
|
||||
else:
|
||||
duration_s = _duration_in_seconds(duration=key_budget_duration)
|
||||
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
|
||||
if budget_duration is None: # one-time budget
|
||||
reset_at = None
|
||||
else:
|
||||
duration_s = _duration_in_seconds(duration=budget_duration)
|
||||
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
|
||||
aliases_json = json.dumps(aliases)
|
||||
config_json = json.dumps(config)
|
||||
permissions_json = json.dumps(permissions)
|
||||
metadata_json = json.dumps(metadata)
|
||||
model_max_budget_json = json.dumps(model_max_budget)
|
||||
user_role = user_role
|
||||
tpm_limit = tpm_limit
|
||||
rpm_limit = rpm_limit
|
||||
allowed_cache_controls = allowed_cache_controls
|
||||
|
||||
try:
|
||||
# Create a new verification token (you may want to enhance this logic based on your needs)
|
||||
user_data = {
|
||||
"max_budget": max_budget,
|
||||
"user_email": user_email,
|
||||
"user_id": user_id,
|
||||
"team_id": team_id,
|
||||
"organization_id": organization_id,
|
||||
"user_role": user_role,
|
||||
"spend": spend,
|
||||
"models": models,
|
||||
"max_parallel_requests": max_parallel_requests,
|
||||
"tpm_limit": tpm_limit,
|
||||
"rpm_limit": rpm_limit,
|
||||
"budget_duration": budget_duration,
|
||||
"budget_reset_at": reset_at,
|
||||
"allowed_cache_controls": allowed_cache_controls,
|
||||
}
|
||||
if teams is not None:
|
||||
user_data["teams"] = teams
|
||||
key_data = {
|
||||
"token": token,
|
||||
"key_alias": key_alias,
|
||||
"expires": expires,
|
||||
"models": models,
|
||||
"aliases": aliases_json,
|
||||
"config": config_json,
|
||||
"spend": spend,
|
||||
"max_budget": key_max_budget,
|
||||
"user_id": user_id,
|
||||
"team_id": team_id,
|
||||
"max_parallel_requests": max_parallel_requests,
|
||||
"metadata": metadata_json,
|
||||
"tpm_limit": tpm_limit,
|
||||
"rpm_limit": rpm_limit,
|
||||
"budget_duration": key_budget_duration,
|
||||
"budget_reset_at": key_reset_at,
|
||||
"allowed_cache_controls": allowed_cache_controls,
|
||||
"permissions": permissions_json,
|
||||
"model_max_budget": model_max_budget_json,
|
||||
"budget_id": budget_id,
|
||||
}
|
||||
|
||||
if (
|
||||
litellm.get_secret("DISABLE_KEY_NAME", False) == True
|
||||
): # allow user to disable storing abbreviated key name (shown in UI, to help figure out which key spent how much)
|
||||
pass
|
||||
else:
|
||||
key_data["key_name"] = f"sk-...{token[-4:]}"
|
||||
saved_token = copy.deepcopy(key_data)
|
||||
if isinstance(saved_token["aliases"], str):
|
||||
saved_token["aliases"] = json.loads(saved_token["aliases"])
|
||||
if isinstance(saved_token["config"], str):
|
||||
saved_token["config"] = json.loads(saved_token["config"])
|
||||
if isinstance(saved_token["metadata"], str):
|
||||
saved_token["metadata"] = json.loads(saved_token["metadata"])
|
||||
if isinstance(saved_token["permissions"], str):
|
||||
if (
|
||||
"get_spend_routes" in saved_token["permissions"]
|
||||
and premium_user != True
|
||||
):
|
||||
raise ValueError(
|
||||
"get_spend_routes permission is only available for LiteLLM Enterprise users"
|
||||
)
|
||||
|
||||
saved_token["permissions"] = json.loads(saved_token["permissions"])
|
||||
if isinstance(saved_token["model_max_budget"], str):
|
||||
saved_token["model_max_budget"] = json.loads(
|
||||
saved_token["model_max_budget"]
|
||||
)
|
||||
|
||||
if saved_token.get("expires", None) is not None and isinstance(
|
||||
saved_token["expires"], datetime
|
||||
):
|
||||
saved_token["expires"] = saved_token["expires"].isoformat()
|
||||
if prisma_client is not None:
|
||||
if (
|
||||
table_name is None or table_name == "user"
|
||||
): # do not auto-create users for `/key/generate`
|
||||
## CREATE USER (If necessary)
|
||||
if query_type == "insert_data":
|
||||
user_row = await prisma_client.insert_data(
|
||||
data=user_data, table_name="user"
|
||||
)
|
||||
## use default user model list if no key-specific model list provided
|
||||
if len(user_row.models) > 0 and len(key_data["models"]) == 0: # type: ignore
|
||||
key_data["models"] = user_row.models
|
||||
elif query_type == "update_data":
|
||||
user_row = await prisma_client.update_data(
|
||||
data=user_data,
|
||||
table_name="user",
|
||||
update_key_values=update_key_values,
|
||||
)
|
||||
if user_id == litellm_proxy_budget_name or (
|
||||
table_name is not None and table_name == "user"
|
||||
):
|
||||
# do not create a key for litellm_proxy_budget_name or if table name is set to just 'user'
|
||||
# we only need to ensure this exists in the user table
|
||||
# the LiteLLM_VerificationToken table will increase in size if we don't do this check
|
||||
return user_data
|
||||
|
||||
## CREATE KEY
|
||||
verbose_proxy_logger.debug("prisma_client: Creating Key= %s", key_data)
|
||||
create_key_response = await prisma_client.insert_data(
|
||||
data=key_data, table_name="key"
|
||||
)
|
||||
key_data["token_id"] = getattr(create_key_response, "token", None)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise e
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail={"error": "Internal Server Error."},
|
||||
)
|
||||
|
||||
# Add budget related info in key_data - this ensures it's returned
|
||||
key_data["budget_id"] = budget_id
|
||||
|
||||
if request_type == "user":
|
||||
# if this is a /user/new request update the key_date with user_data fields
|
||||
key_data.update(user_data)
|
||||
return key_data
|
||||
|
||||
|
||||
async def delete_verification_token(tokens: List, user_id: Optional[str] = None):
|
||||
from litellm.proxy.proxy_server import prisma_client, litellm_proxy_admin_name
|
||||
|
||||
try:
|
||||
if prisma_client:
|
||||
# Assuming 'db' is your Prisma Client instance
|
||||
# check if admin making request - don't filter by user-id
|
||||
if user_id == litellm_proxy_admin_name:
|
||||
deleted_tokens = await prisma_client.delete_data(tokens=tokens)
|
||||
# else
|
||||
else:
|
||||
deleted_tokens = await prisma_client.delete_data(
|
||||
tokens=tokens, user_id=user_id
|
||||
)
|
||||
_num_deleted_tokens = deleted_tokens.get("deleted_keys", 0)
|
||||
if _num_deleted_tokens != len(tokens):
|
||||
raise Exception(
|
||||
"Failed to delete all tokens. Tried to delete tokens that don't belong to user: "
|
||||
+ str(user_id)
|
||||
)
|
||||
else:
|
||||
raise Exception("DB not connected. prisma_client is None")
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
raise e
|
||||
return deleted_tokens
|
899
litellm/proxy/management_endpoints/team_endpoints.py
Normal file
899
litellm/proxy/management_endpoints/team_endpoints.py
Normal file
|
@ -0,0 +1,899 @@
|
|||
from typing import Optional, List
|
||||
import fastapi
|
||||
from fastapi import Depends, Request, APIRouter, Header, status
|
||||
from fastapi import HTTPException
|
||||
import copy
|
||||
import json
|
||||
import uuid
|
||||
import litellm
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy._types import (
|
||||
UserAPIKeyAuth,
|
||||
LiteLLM_TeamTable,
|
||||
LiteLLM_ModelTable,
|
||||
LitellmUserRoles,
|
||||
NewTeamRequest,
|
||||
TeamMemberAddRequest,
|
||||
UpdateTeamRequest,
|
||||
BlockTeamRequest,
|
||||
DeleteTeamRequest,
|
||||
Member,
|
||||
LitellmTableNames,
|
||||
LiteLLM_AuditLogs,
|
||||
TeamMemberDeleteRequest,
|
||||
ProxyException,
|
||||
CommonProxyErrors,
|
||||
)
|
||||
from litellm.proxy.management_helpers.utils import (
|
||||
add_new_member,
|
||||
management_endpoint_wrapper,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
#### TEAM MANAGEMENT ####
|
||||
@router.post(
|
||||
"/team/new",
|
||||
tags=["team management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=LiteLLM_TeamTable,
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def new_team(
|
||||
data: NewTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Allow users to create a new team. Apply user permissions to their team.
|
||||
|
||||
👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
|
||||
|
||||
Parameters:
|
||||
- team_alias: Optional[str] - User defined team alias
|
||||
- team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
|
||||
- members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
|
||||
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
|
||||
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
|
||||
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
|
||||
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
|
||||
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
|
||||
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
|
||||
|
||||
Returns:
|
||||
- team_id: (str) Unique team id - used for tracking spend across multiple keys for same team id.
|
||||
|
||||
_deprecated_params:
|
||||
- admins: list - A list of user_id's for the admin role
|
||||
- users: list - A list of user_id's for the user role
|
||||
|
||||
Example Request:
|
||||
```
|
||||
curl --location 'http://0.0.0.0:4000/team/new' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data '{
|
||||
"team_alias": "my-new-team_2",
|
||||
"members_with_roles": [{"role": "admin", "user_id": "user-1234"},
|
||||
{"role": "user", "user_id": "user-2434"}]
|
||||
}'
|
||||
|
||||
```
|
||||
|
||||
```
|
||||
curl --location 'http://0.0.0.0:4000/team/new' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data '{
|
||||
"team_alias": "QA Prod Bot",
|
||||
"max_budget": 0.000000001,
|
||||
"budget_duration": "1d"
|
||||
}'
|
||||
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
if data.team_id is None:
|
||||
data.team_id = str(uuid.uuid4())
|
||||
else:
|
||||
# Check if team_id exists already
|
||||
_existing_team_id = await prisma_client.get_data(
|
||||
team_id=data.team_id, table_name="team", query_type="find_unique"
|
||||
)
|
||||
if _existing_team_id is not None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Team id = {data.team_id} already exists. Please use a different team id."
|
||||
},
|
||||
)
|
||||
|
||||
if (
|
||||
user_api_key_dict.user_role is None
|
||||
or user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
||||
): # don't restrict proxy admin
|
||||
if (
|
||||
data.tpm_limit is not None
|
||||
and user_api_key_dict.tpm_limit is not None
|
||||
and data.tpm_limit > user_api_key_dict.tpm_limit
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"tpm limit higher than user max. User tpm limit={user_api_key_dict.tpm_limit}. User role={user_api_key_dict.user_role}"
|
||||
},
|
||||
)
|
||||
|
||||
if (
|
||||
data.rpm_limit is not None
|
||||
and user_api_key_dict.rpm_limit is not None
|
||||
and data.rpm_limit > user_api_key_dict.rpm_limit
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"rpm limit higher than user max. User rpm limit={user_api_key_dict.rpm_limit}. User role={user_api_key_dict.user_role}"
|
||||
},
|
||||
)
|
||||
|
||||
if (
|
||||
data.max_budget is not None
|
||||
and user_api_key_dict.max_budget is not None
|
||||
and data.max_budget > user_api_key_dict.max_budget
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"max budget higher than user max. User max budget={user_api_key_dict.max_budget}. User role={user_api_key_dict.user_role}"
|
||||
},
|
||||
)
|
||||
|
||||
if data.models is not None and len(user_api_key_dict.models) > 0:
|
||||
for m in data.models:
|
||||
if m not in user_api_key_dict.models:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Model not in allowed user models. User allowed models={user_api_key_dict.models}. User id={user_api_key_dict.user_id}"
|
||||
},
|
||||
)
|
||||
|
||||
if user_api_key_dict.user_id is not None:
|
||||
creating_user_in_list = False
|
||||
for member in data.members_with_roles:
|
||||
if member.user_id == user_api_key_dict.user_id:
|
||||
creating_user_in_list = True
|
||||
|
||||
if creating_user_in_list == False:
|
||||
data.members_with_roles.append(
|
||||
Member(role="admin", user_id=user_api_key_dict.user_id)
|
||||
)
|
||||
|
||||
## ADD TO MODEL TABLE
|
||||
_model_id = None
|
||||
if data.model_aliases is not None and isinstance(data.model_aliases, dict):
|
||||
litellm_modeltable = LiteLLM_ModelTable(
|
||||
model_aliases=json.dumps(data.model_aliases),
|
||||
created_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
updated_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
)
|
||||
model_dict = await prisma_client.db.litellm_modeltable.create(
|
||||
{**litellm_modeltable.json(exclude_none=True)} # type: ignore
|
||||
) # type: ignore
|
||||
|
||||
_model_id = model_dict.id
|
||||
|
||||
## ADD TO TEAM TABLE
|
||||
complete_team_data = LiteLLM_TeamTable(
|
||||
**data.json(),
|
||||
model_id=_model_id,
|
||||
)
|
||||
|
||||
# If budget_duration is set, set `budget_reset_at`
|
||||
if complete_team_data.budget_duration is not None:
|
||||
duration_s = _duration_in_seconds(duration=complete_team_data.budget_duration)
|
||||
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
complete_team_data.budget_reset_at = reset_at
|
||||
|
||||
team_row = await prisma_client.insert_data(
|
||||
data=complete_team_data.json(exclude_none=True), table_name="team"
|
||||
)
|
||||
|
||||
## ADD TEAM ID TO USER TABLE ##
|
||||
for user in complete_team_data.members_with_roles:
|
||||
## add team id to user row ##
|
||||
await prisma_client.update_data(
|
||||
user_id=user.user_id,
|
||||
data={"user_id": user.user_id, "teams": [team_row.team_id]},
|
||||
update_key_values_custom_query={
|
||||
"teams": {
|
||||
"push ": [team_row.team_id],
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = complete_team_data.json(exclude_none=True)
|
||||
|
||||
_updated_values = json.dumps(_updated_values, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
request_data=LiteLLM_AuditLogs(
|
||||
id=str(uuid.uuid4()),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
changed_by=litellm_changed_by
|
||||
or user_api_key_dict.user_id
|
||||
or litellm_proxy_admin_name,
|
||||
changed_by_api_key=user_api_key_dict.api_key,
|
||||
table_name=LitellmTableNames.TEAM_TABLE_NAME,
|
||||
object_id=data.team_id,
|
||||
action="created",
|
||||
updated_values=_updated_values,
|
||||
before_value=None,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
return team_row.model_dump()
|
||||
except Exception as e:
|
||||
return team_row.dict()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def update_team(
|
||||
data: UpdateTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Use `/team/member_add` AND `/team/member/delete` to add/remove new team members
|
||||
|
||||
You can now update team budget / rate limits via /team/update
|
||||
|
||||
Parameters:
|
||||
- team_id: str - The team id of the user. Required param.
|
||||
- team_alias: Optional[str] - User defined team alias
|
||||
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
|
||||
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
|
||||
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
|
||||
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
|
||||
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
|
||||
|
||||
Example - update team TPM Limit
|
||||
|
||||
```
|
||||
curl --location 'http://0.0.0.0:8000/team/update' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data-raw '{
|
||||
"team_id": "litellm-test-client-id-new",
|
||||
"tpm_limit": 100
|
||||
}'
|
||||
```
|
||||
|
||||
Example - Update Team `max_budget` budget
|
||||
```
|
||||
curl --location 'http://0.0.0.0:8000/team/update' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data-raw '{
|
||||
"team_id": "litellm-test-client-id-new",
|
||||
"max_budget": 10
|
||||
}'
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
if data.team_id is None:
|
||||
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
|
||||
verbose_proxy_logger.debug("/team/update - %s", data)
|
||||
|
||||
existing_team_row = await prisma_client.get_data(
|
||||
team_id=data.team_id, table_name="team", query_type="find_unique"
|
||||
)
|
||||
if existing_team_row is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"error": f"Team not found, passed team_id={data.team_id}"},
|
||||
)
|
||||
|
||||
updated_kv = data.json(exclude_none=True)
|
||||
|
||||
# Check budget_duration and budget_reset_at
|
||||
if data.budget_duration is not None:
|
||||
duration_s = _duration_in_seconds(duration=data.budget_duration)
|
||||
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
|
||||
# set the budget_reset_at in DB
|
||||
updated_kv["budget_reset_at"] = reset_at
|
||||
|
||||
team_row = await prisma_client.update_data(
|
||||
update_key_values=updated_kv,
|
||||
data=updated_kv,
|
||||
table_name="team",
|
||||
team_id=data.team_id,
|
||||
)
|
||||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_before_value = existing_team_row.json(exclude_none=True)
|
||||
_before_value = json.dumps(_before_value, default=str)
|
||||
_after_value: str = json.dumps(updated_kv, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
request_data=LiteLLM_AuditLogs(
|
||||
id=str(uuid.uuid4()),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
changed_by=litellm_changed_by
|
||||
or user_api_key_dict.user_id
|
||||
or litellm_proxy_admin_name,
|
||||
changed_by_api_key=user_api_key_dict.api_key,
|
||||
table_name=LitellmTableNames.TEAM_TABLE_NAME,
|
||||
object_id=data.team_id,
|
||||
action="updated",
|
||||
updated_values=_after_value,
|
||||
before_value=_before_value,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
return team_row
|
||||
|
||||
|
||||
@router.post(
|
||||
"/team/member_add",
|
||||
tags=["team management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def team_member_add(
|
||||
data: TeamMemberAddRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
[BETA]
|
||||
|
||||
Add new members (either via user_email or user_id) to a team
|
||||
|
||||
If user doesn't exist, new user row will also be added to User Table
|
||||
|
||||
```
|
||||
|
||||
curl -X POST 'http://0.0.0.0:4000/team/member_add' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849", "member": {"role": "user", "user_id": "krrish247652@berri.ai"}}'
|
||||
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
if data.team_id is None:
|
||||
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
|
||||
|
||||
if data.member is None:
|
||||
raise HTTPException(
|
||||
status_code=400, detail={"error": "No member/members passed in"}
|
||||
)
|
||||
|
||||
existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": data.team_id}
|
||||
)
|
||||
if existing_team_row is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
|
||||
},
|
||||
)
|
||||
|
||||
complete_team_data = LiteLLM_TeamTable(**existing_team_row.model_dump())
|
||||
|
||||
if isinstance(data.member, Member):
|
||||
# add to team db
|
||||
new_member = data.member
|
||||
|
||||
complete_team_data.members_with_roles.append(new_member)
|
||||
|
||||
elif isinstance(data.member, List):
|
||||
# add to team db
|
||||
new_members = data.member
|
||||
|
||||
complete_team_data.members_with_roles.extend(new_members)
|
||||
|
||||
# ADD MEMBER TO TEAM
|
||||
_db_team_members = [m.model_dump() for m in complete_team_data.members_with_roles]
|
||||
updated_team = await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": data.team_id},
|
||||
data={"members_with_roles": json.dumps(_db_team_members)}, # type: ignore
|
||||
)
|
||||
|
||||
if isinstance(data.member, Member):
|
||||
await add_new_member(
|
||||
new_member=data.member,
|
||||
max_budget_in_team=data.max_budget_in_team,
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
litellm_proxy_admin_name=litellm_proxy_admin_name,
|
||||
team_id=data.team_id,
|
||||
)
|
||||
elif isinstance(data.member, List):
|
||||
tasks: List = []
|
||||
for m in data.member:
|
||||
await add_new_member(
|
||||
new_member=m,
|
||||
max_budget_in_team=data.max_budget_in_team,
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
litellm_proxy_admin_name=litellm_proxy_admin_name,
|
||||
team_id=data.team_id,
|
||||
)
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
return updated_team
|
||||
|
||||
|
||||
@router.post(
|
||||
"/team/member_delete",
|
||||
tags=["team management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def team_member_delete(
|
||||
data: TeamMemberDeleteRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
[BETA]
|
||||
|
||||
delete members (either via user_email or user_id) from a team
|
||||
|
||||
If user doesn't exist, an exception will be raised
|
||||
```
|
||||
curl -X POST 'http://0.0.0.0:8000/team/update' \
|
||||
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
|
||||
-H 'Content-Type: application/json' \
|
||||
|
||||
-D '{
|
||||
"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849",
|
||||
"user_id": "krrish247652@berri.ai"
|
||||
}'
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
if data.team_id is None:
|
||||
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
|
||||
|
||||
if data.user_id is None and data.user_email is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={"error": "Either user_id or user_email needs to be passed in"},
|
||||
)
|
||||
|
||||
_existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": data.team_id}
|
||||
)
|
||||
|
||||
if _existing_team_row is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={"error": "Team id={} does not exist in db".format(data.team_id)},
|
||||
)
|
||||
existing_team_row = LiteLLM_TeamTable(**_existing_team_row.model_dump())
|
||||
|
||||
## DELETE MEMBER FROM TEAM
|
||||
new_team_members: List[Member] = []
|
||||
for m in existing_team_row.members_with_roles:
|
||||
if (
|
||||
data.user_id is not None
|
||||
and m.user_id is not None
|
||||
and data.user_id == m.user_id
|
||||
):
|
||||
continue
|
||||
elif (
|
||||
data.user_email is not None
|
||||
and m.user_email is not None
|
||||
and data.user_email == m.user_email
|
||||
):
|
||||
continue
|
||||
new_team_members.append(m)
|
||||
existing_team_row.members_with_roles = new_team_members
|
||||
|
||||
_db_new_team_members: List[dict] = [m.model_dump() for m in new_team_members]
|
||||
|
||||
_ = await prisma_client.db.litellm_teamtable.update(
|
||||
where={
|
||||
"team_id": data.team_id,
|
||||
},
|
||||
data={"members_with_roles": json.dumps(_db_new_team_members)}, # type: ignore
|
||||
)
|
||||
|
||||
## DELETE TEAM ID from USER ROW, IF EXISTS ##
|
||||
# get user row
|
||||
key_val = {}
|
||||
if data.user_id is not None:
|
||||
key_val["user_id"] = data.user_id
|
||||
elif data.user_email is not None:
|
||||
key_val["user_email"] = data.user_email
|
||||
existing_user_rows = await prisma_client.db.litellm_usertable.find_many(
|
||||
where=key_val # type: ignore
|
||||
)
|
||||
|
||||
if existing_user_rows is not None and (
|
||||
isinstance(existing_user_rows, list) and len(existing_user_rows) > 0
|
||||
):
|
||||
for existing_user in existing_user_rows:
|
||||
team_list = []
|
||||
if data.team_id in existing_user.teams:
|
||||
team_list = existing_user.teams
|
||||
team_list.remove(data.team_id)
|
||||
await prisma_client.db.litellm_usertable.update(
|
||||
where={
|
||||
"user_id": existing_user.user_id,
|
||||
},
|
||||
data={"teams": {"set": team_list}},
|
||||
)
|
||||
|
||||
return existing_team_row
|
||||
|
||||
|
||||
@router.post(
|
||||
"/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def delete_team(
|
||||
data: DeleteTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
|
||||
),
|
||||
):
|
||||
"""
|
||||
delete team and associated team keys
|
||||
|
||||
```
|
||||
curl --location 'http://0.0.0.0:8000/team/delete' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data-raw '{
|
||||
"team_ids": ["45e3e396-ee08-4a61-a88e-16b3ce7e0849"]
|
||||
}'
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
if data.team_ids is None:
|
||||
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
|
||||
|
||||
# check that all teams passed exist
|
||||
for team_id in data.team_ids:
|
||||
team_row = await prisma_client.get_data( # type: ignore
|
||||
team_id=team_id, table_name="team", query_type="find_unique"
|
||||
)
|
||||
if team_row is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"error": f"Team not found, passed team_id={team_id}"},
|
||||
)
|
||||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
|
||||
if litellm.store_audit_logs is True:
|
||||
# make an audit log for each team deleted
|
||||
for team_id in data.team_ids:
|
||||
team_row = await prisma_client.get_data( # type: ignore
|
||||
team_id=team_id, table_name="team", query_type="find_unique"
|
||||
)
|
||||
|
||||
_team_row = team_row.json(exclude_none=True)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
request_data=LiteLLM_AuditLogs(
|
||||
id=str(uuid.uuid4()),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
changed_by=litellm_changed_by
|
||||
or user_api_key_dict.user_id
|
||||
or litellm_proxy_admin_name,
|
||||
changed_by_api_key=user_api_key_dict.api_key,
|
||||
table_name=LitellmTableNames.TEAM_TABLE_NAME,
|
||||
object_id=team_id,
|
||||
action="deleted",
|
||||
updated_values="{}",
|
||||
before_value=_team_row,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# End of Audit logging
|
||||
|
||||
## DELETE ASSOCIATED KEYS
|
||||
await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
|
||||
## DELETE TEAMS
|
||||
deleted_teams = await prisma_client.delete_data(
|
||||
team_id_list=data.team_ids, table_name="team"
|
||||
)
|
||||
return deleted_teams
|
||||
|
||||
|
||||
@router.get(
|
||||
"/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def team_info(
|
||||
http_request: Request,
|
||||
team_id: str = fastapi.Query(
|
||||
default=None, description="Team ID in the request parameters"
|
||||
),
|
||||
):
|
||||
"""
|
||||
get info on team + related keys
|
||||
|
||||
```
|
||||
curl --location 'http://localhost:4000/team/info' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"teams": ["<team-id>",..]
|
||||
}'
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
try:
|
||||
if prisma_client is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail={
|
||||
"error": f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||
},
|
||||
)
|
||||
if team_id is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
detail={"message": "Malformed request. No team id passed in."},
|
||||
)
|
||||
|
||||
team_info = await prisma_client.get_data(
|
||||
team_id=team_id, table_name="team", query_type="find_unique"
|
||||
)
|
||||
if team_info is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail={"message": f"Team not found, passed team id: {team_id}."},
|
||||
)
|
||||
|
||||
## GET ALL KEYS ##
|
||||
keys = await prisma_client.get_data(
|
||||
team_id=team_id,
|
||||
table_name="key",
|
||||
query_type="find_all",
|
||||
expires=datetime.now(),
|
||||
)
|
||||
|
||||
if team_info is None:
|
||||
## make sure we still return a total spend ##
|
||||
spend = 0
|
||||
for k in keys:
|
||||
spend += getattr(k, "spend", 0)
|
||||
team_info = {"spend": spend}
|
||||
|
||||
## REMOVE HASHED TOKEN INFO before returning ##
|
||||
for key in keys:
|
||||
try:
|
||||
key = key.model_dump() # noqa
|
||||
except:
|
||||
# if using pydantic v1
|
||||
key = key.dict()
|
||||
key.pop("token", None)
|
||||
return {"team_id": team_id, "team_info": team_info, "keys": keys}
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Authentication Error, " + str(e),
|
||||
type="auth_error",
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def block_team(
|
||||
data: BlockTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Blocks all calls from keys with this team id.
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise Exception("No DB Connected.")
|
||||
|
||||
record = await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": data.team_id}, data={"blocked": True} # type: ignore
|
||||
)
|
||||
|
||||
return record
|
||||
|
||||
|
||||
@router.post(
|
||||
"/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def unblock_team(
|
||||
data: BlockTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Blocks all calls from keys with this team id.
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise Exception("No DB Connected.")
|
||||
|
||||
record = await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": data.team_id}, data={"blocked": False} # type: ignore
|
||||
)
|
||||
|
||||
return record
|
||||
|
||||
|
||||
@router.get(
|
||||
"/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def list_team(
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
[Admin-only] List all available teams
|
||||
|
||||
```
|
||||
curl --location --request GET 'http://0.0.0.0:4000/team/list' \
|
||||
--header 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
prisma_client,
|
||||
litellm_proxy_admin_name,
|
||||
create_audit_log_for_update,
|
||||
_duration_in_seconds,
|
||||
)
|
||||
|
||||
if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail={
|
||||
"error": "Admin-only endpoint. Your user role={}".format(
|
||||
user_api_key_dict.user_role
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||
)
|
||||
|
||||
response = await prisma_client.db.litellm_teamtable.find_many()
|
||||
|
||||
return response
|
|
@ -1,5 +1,11 @@
|
|||
# What is this?
|
||||
## Helper utils for the management endpoints (keys/users/teams)
|
||||
from datetime import datetime
|
||||
from functools import wraps
|
||||
from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from litellm._logging import verbose_logger
|
||||
from fastapi import Request
|
||||
|
||||
from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth
|
||||
from litellm.proxy.utils import PrismaClient
|
||||
|
@ -61,3 +67,110 @@ async def add_new_member(
|
|||
"budget_id": _budget_id,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def management_endpoint_wrapper(func):
|
||||
"""
|
||||
This wrapper does the following:
|
||||
|
||||
1. Log I/O, Exceptions to OTEL
|
||||
2. Create an Audit log for success calls
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
end_time = datetime.now()
|
||||
try:
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
user_api_key_dict: UserAPIKeyAuth = (
|
||||
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
|
||||
)
|
||||
_http_request: Request = kwargs.get("http_request")
|
||||
parent_otel_span = user_api_key_dict.parent_otel_span
|
||||
if parent_otel_span is not None:
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
if open_telemetry_logger is not None:
|
||||
if _http_request:
|
||||
_route = _http_request.url.path
|
||||
_request_body: dict = await _read_request_body(
|
||||
request=_http_request
|
||||
)
|
||||
_response = dict(result) if result is not None else None
|
||||
|
||||
logging_payload = ManagementEndpointLoggingPayload(
|
||||
route=_route,
|
||||
request_data=_request_body,
|
||||
response=_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
await open_telemetry_logger.async_management_endpoint_success_hook(
|
||||
logging_payload=logging_payload,
|
||||
parent_otel_span=parent_otel_span,
|
||||
)
|
||||
|
||||
if _http_request:
|
||||
_route = _http_request.url.path
|
||||
# Flush user_api_key cache if this was an update/delete call to /key, /team, or /user
|
||||
if _route in [
|
||||
"/key/update",
|
||||
"/key/delete",
|
||||
"/team/update",
|
||||
"/team/delete",
|
||||
"/user/update",
|
||||
"/user/delete",
|
||||
"/customer/update",
|
||||
"/customer/delete",
|
||||
]:
|
||||
from litellm.proxy.proxy_server import user_api_key_cache
|
||||
|
||||
user_api_key_cache.flush_cache()
|
||||
except Exception as e:
|
||||
# Non-Blocking Exception
|
||||
verbose_logger.debug("Error in management endpoint wrapper: %s", str(e))
|
||||
pass
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
end_time = datetime.now()
|
||||
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
user_api_key_dict: UserAPIKeyAuth = (
|
||||
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
|
||||
)
|
||||
parent_otel_span = user_api_key_dict.parent_otel_span
|
||||
if parent_otel_span is not None:
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
if open_telemetry_logger is not None:
|
||||
_http_request: Request = kwargs.get("http_request")
|
||||
if _http_request:
|
||||
_route = _http_request.url.path
|
||||
_request_body: dict = await _read_request_body(
|
||||
request=_http_request
|
||||
)
|
||||
logging_payload = ManagementEndpointLoggingPayload(
|
||||
route=_route,
|
||||
request_data=_request_body,
|
||||
response=None,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
await open_telemetry_logger.async_management_endpoint_failure_hook(
|
||||
logging_payload=logging_payload,
|
||||
parent_otel_span=parent_otel_span,
|
||||
)
|
||||
|
||||
raise e
|
||||
|
||||
return wrapper
|
||||
|
|
|
@ -20,6 +20,7 @@ model_list:
|
|||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
alerting: ["slack", "email"]
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -12,6 +12,8 @@ import litellm
|
|||
import backoff
|
||||
import traceback
|
||||
from pydantic import BaseModel
|
||||
import litellm.litellm_core_utils
|
||||
import litellm.litellm_core_utils.litellm_logging
|
||||
from litellm.proxy._types import (
|
||||
UserAPIKeyAuth,
|
||||
DynamoDBArgs,
|
||||
|
@ -266,7 +268,9 @@ class ProxyLogging:
|
|||
+ litellm.failure_callback
|
||||
)
|
||||
)
|
||||
litellm.utils.set_callbacks(callback_list=callback_list)
|
||||
litellm.litellm_core_utils.litellm_logging.set_callbacks(
|
||||
callback_list=callback_list
|
||||
)
|
||||
|
||||
# The actual implementation of the function
|
||||
async def pre_call_hook(
|
||||
|
@ -331,7 +335,9 @@ class ProxyLogging:
|
|||
return data
|
||||
except Exception as e:
|
||||
if "litellm_logging_obj" in data:
|
||||
logging_obj: litellm.utils.Logging = data["litellm_logging_obj"]
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[
|
||||
"litellm_logging_obj"
|
||||
]
|
||||
|
||||
## ASYNC FAILURE HANDLER ##
|
||||
error_message = ""
|
||||
|
@ -455,6 +461,7 @@ class ProxyLogging:
|
|||
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
|
||||
|
||||
extra_kwargs = {}
|
||||
alerting_metadata = {}
|
||||
if request_data is not None:
|
||||
_url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
|
||||
request_data=request_data
|
||||
|
@ -462,7 +469,12 @@ class ProxyLogging:
|
|||
if _url is not None:
|
||||
extra_kwargs["🪢 Langfuse Trace"] = _url
|
||||
formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url)
|
||||
|
||||
if (
|
||||
"metadata" in request_data
|
||||
and request_data["metadata"].get("alerting_metadata", None) is not None
|
||||
and isinstance(request_data["metadata"]["alerting_metadata"], dict)
|
||||
):
|
||||
alerting_metadata = request_data["metadata"]["alerting_metadata"]
|
||||
for client in self.alerting:
|
||||
if client == "slack":
|
||||
await self.slack_alerting_instance.send_alert(
|
||||
|
@ -470,6 +482,7 @@ class ProxyLogging:
|
|||
level=level,
|
||||
alert_type=alert_type,
|
||||
user_info=None,
|
||||
alerting_metadata=alerting_metadata,
|
||||
**extra_kwargs,
|
||||
)
|
||||
elif client == "sentry":
|
||||
|
@ -510,7 +523,7 @@ class ProxyLogging:
|
|||
)
|
||||
|
||||
if hasattr(self, "service_logging_obj"):
|
||||
self.service_logging_obj.async_service_failure_hook(
|
||||
await self.service_logging_obj.async_service_failure_hook(
|
||||
service=ServiceTypes.DB,
|
||||
duration=duration,
|
||||
error=error_message,
|
||||
|
@ -1960,6 +1973,9 @@ async def send_email(receiver_email, subject, html):
|
|||
email_message["From"] = sender_email
|
||||
email_message["To"] = receiver_email
|
||||
email_message["Subject"] = subject
|
||||
verbose_proxy_logger.debug(
|
||||
"sending email from %s to %s", sender_email, receiver_email
|
||||
)
|
||||
|
||||
# Attach the body to the email
|
||||
email_message.attach(MIMEText(html, "html"))
|
||||
|
@ -2115,6 +2131,16 @@ def _extract_from_regex(duration: str) -> Tuple[int, str]:
|
|||
return value, unit
|
||||
|
||||
|
||||
def get_last_day_of_month(year, month):
|
||||
# Handle December case
|
||||
if month == 12:
|
||||
return 31
|
||||
# Next month is January, so subtract a day from March 1st
|
||||
next_month = datetime(year=year, month=month + 1, day=1)
|
||||
last_day_of_month = (next_month - timedelta(days=1)).day
|
||||
return last_day_of_month
|
||||
|
||||
|
||||
def _duration_in_seconds(duration: str) -> int:
|
||||
"""
|
||||
Parameters:
|
||||
|
@ -2141,12 +2167,28 @@ def _duration_in_seconds(duration: str) -> int:
|
|||
now = time.time()
|
||||
current_time = datetime.fromtimestamp(now)
|
||||
|
||||
# Calculate the first day of the next month
|
||||
if current_time.month == 12:
|
||||
next_month = datetime(year=current_time.year + 1, month=1, day=1)
|
||||
target_year = current_time.year + 1
|
||||
target_month = 1
|
||||
else:
|
||||
target_year = current_time.year
|
||||
target_month = current_time.month + value
|
||||
|
||||
# Determine the day to set for next month
|
||||
target_day = current_time.day
|
||||
last_day_of_target_month = get_last_day_of_month(target_year, target_month)
|
||||
|
||||
if target_day > last_day_of_target_month:
|
||||
target_day = last_day_of_target_month
|
||||
|
||||
next_month = datetime(
|
||||
year=current_time.year, month=current_time.month + value, day=1
|
||||
year=target_year,
|
||||
month=target_month,
|
||||
day=target_day,
|
||||
hour=current_time.hour,
|
||||
minute=current_time.minute,
|
||||
second=current_time.second,
|
||||
microsecond=current_time.microsecond,
|
||||
)
|
||||
|
||||
# Calculate the duration until the first day of the next month
|
||||
|
@ -2718,47 +2760,6 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
|
|||
return
|
||||
|
||||
|
||||
def _is_user_proxy_admin(user_id_information: Optional[list]):
|
||||
if user_id_information is None:
|
||||
return False
|
||||
|
||||
if len(user_id_information) == 0 or user_id_information[0] is None:
|
||||
return False
|
||||
|
||||
_user = user_id_information[0]
|
||||
if (
|
||||
_user.get("user_role", None) is not None
|
||||
and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
|
||||
):
|
||||
return True
|
||||
|
||||
# if user_id_information contains litellm-proxy-budget
|
||||
# get first user_id that is not litellm-proxy-budget
|
||||
for user in user_id_information:
|
||||
if user.get("user_id") != "litellm-proxy-budget":
|
||||
_user = user
|
||||
break
|
||||
|
||||
if (
|
||||
_user.get("user_role", None) is not None
|
||||
and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _get_user_role(user_id_information: Optional[list]):
|
||||
if user_id_information is None:
|
||||
return None
|
||||
|
||||
if len(user_id_information) == 0 or user_id_information[0] is None:
|
||||
return None
|
||||
|
||||
_user = user_id_information[0]
|
||||
return _user.get("user_role")
|
||||
|
||||
|
||||
def encrypt_value(value: str, master_key: str):
|
||||
import hashlib
|
||||
import nacl.secret
|
||||
|
|
|
@ -66,6 +66,7 @@ from litellm.types.llms.openai import (
|
|||
)
|
||||
from litellm.scheduler import Scheduler, FlowItem
|
||||
from typing import Iterable
|
||||
from litellm.router_utils.handle_error import send_llm_exception_alert
|
||||
|
||||
|
||||
class Router:
|
||||
|
@ -108,6 +109,7 @@ class Router:
|
|||
] = None, # generic fallbacks, works across all deployments
|
||||
fallbacks: List = [],
|
||||
context_window_fallbacks: List = [],
|
||||
content_policy_fallbacks: List = [],
|
||||
model_group_alias: Optional[dict] = {},
|
||||
enable_pre_call_checks: bool = False,
|
||||
retry_after: int = 0, # min time to wait before retrying a failed request
|
||||
|
@ -311,6 +313,12 @@ class Router:
|
|||
self.context_window_fallbacks = (
|
||||
context_window_fallbacks or litellm.context_window_fallbacks
|
||||
)
|
||||
|
||||
_content_policy_fallbacks = (
|
||||
content_policy_fallbacks or litellm.content_policy_fallbacks
|
||||
)
|
||||
self.validate_fallbacks(fallback_param=_content_policy_fallbacks)
|
||||
self.content_policy_fallbacks = _content_policy_fallbacks
|
||||
self.total_calls: defaultdict = defaultdict(
|
||||
int
|
||||
) # dict to store total calls made to each model
|
||||
|
@ -576,6 +584,14 @@ class Router:
|
|||
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def _acompletion(
|
||||
|
@ -1097,6 +1113,14 @@ class Router:
|
|||
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def _aimage_generation(self, prompt: str, model: str, **kwargs):
|
||||
|
@ -1221,6 +1245,14 @@ class Router:
|
|||
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
|
||||
|
@ -1387,6 +1419,14 @@ class Router:
|
|||
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def amoderation(self, model: str, input: str, **kwargs):
|
||||
|
@ -1402,6 +1442,14 @@ class Router:
|
|||
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def _amoderation(self, model: str, input: str, **kwargs):
|
||||
|
@ -1546,6 +1594,14 @@ class Router:
|
|||
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def _atext_completion(self, model: str, prompt: str, **kwargs):
|
||||
|
@ -1741,6 +1797,14 @@ class Router:
|
|||
response = await self.async_function_with_fallbacks(**kwargs)
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def _aembedding(self, input: Union[str, List], model: str, **kwargs):
|
||||
|
@ -1998,6 +2062,9 @@ class Router:
|
|||
context_window_fallbacks = kwargs.get(
|
||||
"context_window_fallbacks", self.context_window_fallbacks
|
||||
)
|
||||
content_policy_fallbacks = kwargs.get(
|
||||
"content_policy_fallbacks", self.content_policy_fallbacks
|
||||
)
|
||||
try:
|
||||
if mock_testing_fallbacks is not None and mock_testing_fallbacks == True:
|
||||
raise Exception(
|
||||
|
@ -2016,7 +2083,10 @@ class Router:
|
|||
if (
|
||||
hasattr(e, "status_code")
|
||||
and e.status_code == 400 # type: ignore
|
||||
and not isinstance(e, litellm.ContextWindowExceededError)
|
||||
and not (
|
||||
isinstance(e, litellm.ContextWindowExceededError)
|
||||
or isinstance(e, litellm.ContentPolicyViolationError)
|
||||
)
|
||||
): # don't retry a malformed request
|
||||
raise e
|
||||
if (
|
||||
|
@ -2034,6 +2104,39 @@ class Router:
|
|||
if fallback_model_group is None:
|
||||
raise original_exception
|
||||
|
||||
for mg in fallback_model_group:
|
||||
"""
|
||||
Iterate through the model groups and try calling that deployment
|
||||
"""
|
||||
try:
|
||||
kwargs["model"] = mg
|
||||
kwargs.setdefault("metadata", {}).update(
|
||||
{"model_group": mg}
|
||||
) # update model_group used, if fallbacks are done
|
||||
response = await self.async_function_with_retries(
|
||||
*args, **kwargs
|
||||
)
|
||||
verbose_router_logger.info(
|
||||
"Successful fallback b/w models."
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
pass
|
||||
elif (
|
||||
isinstance(e, litellm.ContentPolicyViolationError)
|
||||
and content_policy_fallbacks is not None
|
||||
):
|
||||
fallback_model_group = None
|
||||
for (
|
||||
item
|
||||
) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
|
||||
if list(item.keys())[0] == model_group:
|
||||
fallback_model_group = item[model_group]
|
||||
break
|
||||
|
||||
if fallback_model_group is None:
|
||||
raise original_exception
|
||||
|
||||
for mg in fallback_model_group:
|
||||
"""
|
||||
Iterate through the model groups and try calling that deployment
|
||||
|
@ -2114,6 +2217,9 @@ class Router:
|
|||
context_window_fallbacks = kwargs.pop(
|
||||
"context_window_fallbacks", self.context_window_fallbacks
|
||||
)
|
||||
content_policy_fallbacks = kwargs.pop(
|
||||
"content_policy_fallbacks", self.content_policy_fallbacks
|
||||
)
|
||||
|
||||
num_retries = kwargs.pop("num_retries")
|
||||
|
||||
|
@ -2141,6 +2247,7 @@ class Router:
|
|||
healthy_deployments=_healthy_deployments,
|
||||
context_window_fallbacks=context_window_fallbacks,
|
||||
regular_fallbacks=fallbacks,
|
||||
content_policy_fallbacks=content_policy_fallbacks,
|
||||
)
|
||||
|
||||
# decides how long to sleep before retry
|
||||
|
@ -2206,10 +2313,12 @@ class Router:
|
|||
error: Exception,
|
||||
healthy_deployments: Optional[List] = None,
|
||||
context_window_fallbacks: Optional[List] = None,
|
||||
content_policy_fallbacks: Optional[List] = None,
|
||||
regular_fallbacks: Optional[List] = None,
|
||||
):
|
||||
"""
|
||||
1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
|
||||
2. raise an exception for ContentPolicyViolationError if content_policy_fallbacks is not None
|
||||
|
||||
2. raise an exception for RateLimitError if
|
||||
- there are no fallbacks
|
||||
|
@ -2219,13 +2328,19 @@ class Router:
|
|||
if healthy_deployments is not None and isinstance(healthy_deployments, list):
|
||||
_num_healthy_deployments = len(healthy_deployments)
|
||||
|
||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
|
||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR / CONTENT POLICY VIOLATION ERROR w/ fallbacks available / Bad Request Error
|
||||
if (
|
||||
isinstance(error, litellm.ContextWindowExceededError)
|
||||
and context_window_fallbacks is not None
|
||||
):
|
||||
raise error
|
||||
|
||||
if (
|
||||
isinstance(error, litellm.ContentPolicyViolationError)
|
||||
and content_policy_fallbacks is not None
|
||||
):
|
||||
raise error
|
||||
|
||||
# Error we should only retry if there are other deployments
|
||||
if isinstance(error, openai.RateLimitError):
|
||||
if (
|
||||
|
@ -2256,6 +2371,9 @@ class Router:
|
|||
context_window_fallbacks = kwargs.get(
|
||||
"context_window_fallbacks", self.context_window_fallbacks
|
||||
)
|
||||
content_policy_fallbacks = kwargs.get(
|
||||
"content_policy_fallbacks", self.content_policy_fallbacks
|
||||
)
|
||||
try:
|
||||
if mock_testing_fallbacks is not None and mock_testing_fallbacks == True:
|
||||
raise Exception(
|
||||
|
@ -2271,7 +2389,10 @@ class Router:
|
|||
if (
|
||||
hasattr(e, "status_code")
|
||||
and e.status_code == 400 # type: ignore
|
||||
and not isinstance(e, litellm.ContextWindowExceededError)
|
||||
and not (
|
||||
isinstance(e, litellm.ContextWindowExceededError)
|
||||
or isinstance(e, litellm.ContentPolicyViolationError)
|
||||
)
|
||||
): # don't retry a malformed request
|
||||
raise e
|
||||
|
||||
|
@ -2294,6 +2415,37 @@ class Router:
|
|||
if fallback_model_group is None:
|
||||
raise original_exception
|
||||
|
||||
for mg in fallback_model_group:
|
||||
"""
|
||||
Iterate through the model groups and try calling that deployment
|
||||
"""
|
||||
try:
|
||||
## LOGGING
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||
kwargs["model"] = mg
|
||||
kwargs.setdefault("metadata", {}).update(
|
||||
{"model_group": mg}
|
||||
) # update model_group used, if fallbacks are done
|
||||
response = self.function_with_fallbacks(*args, **kwargs)
|
||||
return response
|
||||
except Exception as e:
|
||||
pass
|
||||
elif (
|
||||
isinstance(e, litellm.ContentPolicyViolationError)
|
||||
and content_policy_fallbacks is not None
|
||||
):
|
||||
fallback_model_group = None
|
||||
|
||||
for (
|
||||
item
|
||||
) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
|
||||
if list(item.keys())[0] == model_group:
|
||||
fallback_model_group = item[model_group]
|
||||
break
|
||||
|
||||
if fallback_model_group is None:
|
||||
raise original_exception
|
||||
|
||||
for mg in fallback_model_group:
|
||||
"""
|
||||
Iterate through the model groups and try calling that deployment
|
||||
|
@ -2400,6 +2552,9 @@ class Router:
|
|||
context_window_fallbacks = kwargs.pop(
|
||||
"context_window_fallbacks", self.context_window_fallbacks
|
||||
)
|
||||
content_policy_fallbacks = kwargs.pop(
|
||||
"content_policy_fallbacks", self.content_policy_fallbacks
|
||||
)
|
||||
|
||||
try:
|
||||
# if the function call is successful, no exception will be raised and we'll break out of the loop
|
||||
|
@ -2419,6 +2574,7 @@ class Router:
|
|||
healthy_deployments=_healthy_deployments,
|
||||
context_window_fallbacks=context_window_fallbacks,
|
||||
regular_fallbacks=fallbacks,
|
||||
content_policy_fallbacks=content_policy_fallbacks,
|
||||
)
|
||||
|
||||
# decides how long to sleep before retry
|
||||
|
@ -4570,6 +4726,8 @@ class Router:
|
|||
default_webhook_url=router_alerting_config.webhook_url,
|
||||
)
|
||||
|
||||
self.slack_alerting_logger = _slack_alerting_logger
|
||||
|
||||
litellm.callbacks.append(_slack_alerting_logger)
|
||||
litellm.success_callback.append(
|
||||
_slack_alerting_logger.response_taking_too_long_callback
|
||||
|
|
53
litellm/router_utils/handle_error.py
Normal file
53
litellm/router_utils/handle_error.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
import asyncio
|
||||
import traceback
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.router import Router as _Router
|
||||
|
||||
LitellmRouter = _Router
|
||||
else:
|
||||
LitellmRouter = Any
|
||||
|
||||
|
||||
async def send_llm_exception_alert(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
request_kwargs: dict,
|
||||
error_traceback_str: str,
|
||||
original_exception,
|
||||
):
|
||||
"""
|
||||
Sends a Slack / MS Teams alert for the LLM API call failure.
|
||||
|
||||
Parameters:
|
||||
litellm_router_instance (_Router): The LitellmRouter instance.
|
||||
original_exception (Any): The original exception that occurred.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if litellm_router_instance is None:
|
||||
return
|
||||
|
||||
if not hasattr(litellm_router_instance, "slack_alerting_logger"):
|
||||
return
|
||||
|
||||
if litellm_router_instance.slack_alerting_logger is None:
|
||||
return
|
||||
|
||||
if "proxy_server_request" in request_kwargs:
|
||||
# Do not send any alert if it's a request from litellm proxy server request
|
||||
# the proxy is already instrumented to send LLM API call failures
|
||||
return
|
||||
|
||||
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
|
||||
exception_str = str(original_exception)
|
||||
if litellm_debug_info is not None:
|
||||
exception_str += litellm_debug_info
|
||||
exception_str += f"\n\n{error_traceback_str[:2000]}"
|
||||
|
||||
await litellm_router_instance.slack_alerting_logger.send_alert(
|
||||
message=f"LLM API call failed: `{exception_str}`",
|
||||
level="High",
|
||||
alert_type="llm_exceptions",
|
||||
)
|
|
@ -25,6 +25,9 @@ import pytest
|
|||
from litellm.router import AlertingConfig, Router
|
||||
from litellm.proxy._types import CallInfo
|
||||
from openai import APIError
|
||||
from litellm.router import AlertingConfig
|
||||
import litellm
|
||||
import os
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -159,6 +162,29 @@ async def test_response_taking_too_long_callback(slack_alerting):
|
|||
mock_send_alert.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_alerting_metadata(slack_alerting):
|
||||
"""
|
||||
Test alerting_metadata is propogated correctly for response taking too long
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
end_time = start_time + timedelta(seconds=301)
|
||||
kwargs = {
|
||||
"model": "test_model",
|
||||
"messages": "test_messages",
|
||||
"litellm_params": {"metadata": {"alerting_metadata": {"hello": "world"}}},
|
||||
}
|
||||
with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
|
||||
|
||||
## RESPONSE TAKING TOO LONG
|
||||
await slack_alerting.response_taking_too_long_callback(
|
||||
kwargs, None, start_time, end_time
|
||||
)
|
||||
mock_send_alert.assert_awaited_once()
|
||||
|
||||
assert "hello" in mock_send_alert.call_args[1]["alerting_metadata"]
|
||||
|
||||
|
||||
# Test for budget crossed
|
||||
@pytest.mark.asyncio
|
||||
async def test_budget_alerts_crossed(slack_alerting):
|
||||
|
@ -204,7 +230,9 @@ async def test_send_alert(slack_alerting):
|
|||
slack_alerting.async_http_handler, "post", new=AsyncMock()
|
||||
) as mock_post:
|
||||
mock_post.return_value.status_code = 200
|
||||
await slack_alerting.send_alert("Test message", "Low", "budget_alerts")
|
||||
await slack_alerting.send_alert(
|
||||
"Test message", "Low", "budget_alerts", alerting_metadata={}
|
||||
)
|
||||
mock_post.assert_awaited_once()
|
||||
|
||||
|
||||
|
@ -263,7 +291,7 @@ async def test_daily_reports_completion(slack_alerting):
|
|||
await asyncio.sleep(3)
|
||||
response_val = await slack_alerting.send_daily_reports(router=router)
|
||||
|
||||
assert response_val == True
|
||||
assert response_val is True
|
||||
|
||||
mock_send_alert.assert_awaited_once()
|
||||
|
||||
|
@ -288,7 +316,7 @@ async def test_daily_reports_completion(slack_alerting):
|
|||
await asyncio.sleep(3)
|
||||
response_val = await slack_alerting.send_daily_reports(router=router)
|
||||
|
||||
assert response_val == True
|
||||
assert response_val is True
|
||||
|
||||
mock_send_alert.assert_awaited()
|
||||
|
||||
|
@ -743,3 +771,37 @@ async def test_region_outage_alerting_called(
|
|||
mock_send_alert.assert_called_once()
|
||||
else:
|
||||
mock_send_alert.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="test only needs to run locally ")
|
||||
async def test_alerting():
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": "bad_key",
|
||||
},
|
||||
}
|
||||
],
|
||||
debug_level="DEBUG",
|
||||
set_verbose=True,
|
||||
alerting_config=AlertingConfig(
|
||||
alerting_threshold=10, # threshold for slow / hanging llm responses (in seconds). Defaults to 300 seconds
|
||||
webhook_url=os.getenv(
|
||||
"SLACK_WEBHOOK_URL"
|
||||
), # webhook you want to send alerts to
|
||||
),
|
||||
)
|
||||
try:
|
||||
await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
await asyncio.sleep(3)
|
||||
|
|
|
@ -29,19 +29,22 @@ import pytest, logging, asyncio
|
|||
import litellm, asyncio
|
||||
from litellm.proxy.proxy_server import (
|
||||
new_user,
|
||||
generate_key_fn,
|
||||
user_api_key_auth,
|
||||
user_update,
|
||||
user_info,
|
||||
block_user,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||
delete_key_fn,
|
||||
info_key_fn,
|
||||
update_key_fn,
|
||||
generate_key_fn,
|
||||
generate_key_helper_fn,
|
||||
)
|
||||
from litellm.proxy.spend_reporting_endpoints.spend_management_endpoints import (
|
||||
spend_user_fn,
|
||||
spend_key_fn,
|
||||
view_spend_logs,
|
||||
user_info,
|
||||
block_user,
|
||||
)
|
||||
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue