Merge branch 'BerriAI:main' into main

This commit is contained in:
sajda 2025-04-02 19:56:53 +05:30 committed by GitHub
commit 75f41a2d64
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
134 changed files with 3935 additions and 1451 deletions

View file

@ -1450,7 +1450,7 @@ jobs:
command: | command: |
pwd pwd
ls ls
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/spend_tracking_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
no_output_timeout: 120m no_output_timeout: 120m
# Store test results # Store test results
@ -1743,6 +1743,96 @@ jobs:
# Store test results # Store test results
- store_test_results: - store_test_results:
path: test-results path: test-results
proxy_spend_accuracy_tests:
machine:
image: ubuntu-2204:2023.10.1
resource_class: xlarge
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
- run:
name: Install Python 3.9
command: |
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda init bash
source ~/.bashrc
conda create -n myenv python=3.9 -y
conda activate myenv
python --version
- run:
name: Install Dependencies
command: |
pip install "pytest==7.3.1"
pip install "pytest-asyncio==0.21.1"
pip install aiohttp
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
- run:
name: Build Docker image
command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
- run:
name: Run Docker container
# intentionally give bad redis credentials here
# the OTEL test - should get this as a trace
command: |
docker run -d \
-p 4000:4000 \
-e DATABASE_URL=$PROXY_DATABASE_URL \
-e REDIS_HOST=$REDIS_HOST \
-e REDIS_PASSWORD=$REDIS_PASSWORD \
-e REDIS_PORT=$REDIS_PORT \
-e LITELLM_MASTER_KEY="sk-1234" \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
-e LITELLM_LICENSE=$LITELLM_LICENSE \
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-e USE_DDTRACE=True \
-e DD_API_KEY=$DD_API_KEY \
-e DD_SITE=$DD_SITE \
-e AWS_REGION_NAME=$AWS_REGION_NAME \
--name my-app \
-v $(pwd)/litellm/proxy/example_config_yaml/spend_tracking_config.yaml:/app/config.yaml \
my-app:latest \
--config /app/config.yaml \
--port 4000 \
--detailed_debug \
- run:
name: Install curl and dockerize
command: |
sudo apt-get update
sudo apt-get install -y curl
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
- run:
name: Start outputting logs
command: docker logs -f my-app
background: true
- run:
name: Wait for app to be ready
command: dockerize -wait http://localhost:4000 -timeout 5m
- run:
name: Run tests
command: |
pwd
ls
python -m pytest -vv tests/spend_tracking_tests -x --junitxml=test-results/junit.xml --durations=5
no_output_timeout:
120m
# Clean up first container
- run:
name: Stop and remove first container
command: |
docker stop my-app
docker rm my-app
proxy_multi_instance_tests: proxy_multi_instance_tests:
machine: machine:
@ -2553,6 +2643,12 @@ workflows:
only: only:
- main - main
- /litellm_.*/ - /litellm_.*/
- proxy_spend_accuracy_tests:
filters:
branches:
only:
- main
- /litellm_.*/
- proxy_multi_instance_tests: - proxy_multi_instance_tests:
filters: filters:
branches: branches:
@ -2714,6 +2810,7 @@ workflows:
- installing_litellm_on_python - installing_litellm_on_python
- installing_litellm_on_python_3_13 - installing_litellm_on_python_3_13
- proxy_logging_guardrails_model_info_tests - proxy_logging_guardrails_model_info_tests
- proxy_spend_accuracy_tests
- proxy_multi_instance_tests - proxy_multi_instance_tests
- proxy_store_model_in_db_tests - proxy_store_model_in_db_tests
- proxy_build_from_pip_tests - proxy_build_from_pip_tests

View file

@ -24,10 +24,10 @@ jobs:
run: | run: |
poetry install --with dev poetry install --with dev
- name: Run Black formatting check - name: Run Black formatting
run: | run: |
cd litellm cd litellm
poetry run black . --check poetry run black .
cd .. cd ..
- name: Run Ruff linting - name: Run Ruff linting

View file

@ -1,2 +1,11 @@
python3 -m build python3 -m build
twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ - twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ -
Note: You might need to make a MANIFEST.ini file on root for build process incase it fails
Place this in MANIFEST.ini
recursive-exclude venv *
recursive-exclude myenv *
recursive-exclude py313_env *
recursive-exclude **/.venv *

View file

@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
# /v1/messages [BETA] # /v1/messages [BETA]
LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint. Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format.
This currently just supports the Anthropic API.
## Overview
| Feature | Supported | Notes | | Feature | Supported | Notes |
|-------|-------|-------| |-------|-------|-------|
@ -21,9 +22,61 @@ Planned improvement:
- Bedrock Anthropic support - Bedrock Anthropic support
## Usage ## Usage
---
### LiteLLM Python SDK
#### Non-streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
Example response:
```json
{
"content": [
{
"text": "Hi! this is a very short joke",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-7-sonnet-20250219",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 2095,
"output_tokens": 503,
"cache_creation_input_tokens": 2095,
"cache_read_input_tokens": 0
}
}
```
#### Streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
stream=True,
)
async for chunk in response:
print(chunk)
```
### LiteLLM Proxy Server
<Tabs>
<TabItem label="PROXY" value="proxy">
1. Setup config.yaml 1. Setup config.yaml
@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
3. Test it! 3. Test it!
```bash <Tabs>
<TabItem label="Anthropic Python SDK" value="python">
```python showLineNumbers title="Example using LiteLLM Proxy Server"
import anthropic
# point anthropic sdk to litellm proxy
client = anthropic.Anthropic(
base_url="http://0.0.0.0:4000",
api_key="sk-1234",
)
response = client.messages.create(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
</TabItem>
<TabItem label="curl" value="curl">
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
-H 'content-type: application/json' \ -H 'content-type: application/json' \
-H 'x-api-key: $LITELLM_API_KEY' \ -H 'x-api-key: $LITELLM_API_KEY' \
@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
"content": [ "content": "Hello, can you tell me a short joke?"
{
"type": "text",
"text": "List 5 important events in the XIX century"
}
]
} }
], ],
"max_tokens": 4096 "max_tokens": 100
}' }'
``` ```
</TabItem>
<TabItem value="sdk" label="SDK">
```python
from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
import asyncio
import os
# set env
os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
# Call the handler
async def call():
response = await anthropic_messages(
messages=messages,
api_key=api_key,
model="claude-3-haiku-20240307",
max_tokens=100,
)
asyncio.run(call())
```
</TabItem> </TabItem>
</Tabs> </Tabs>
## Request Format
---
Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
#### Example request body
```json
{
"model": "claude-3-7-sonnet-20250219",
"max_tokens": 1024,
"messages": [
{
"role": "user",
"content": "Hello, world"
}
]
}
```
#### Required Fields
- **model** (string):
The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
- **max_tokens** (integer):
The maximum number of tokens to generate before stopping.
_Note: The model may stop before reaching this limit; value must be greater than 1._
- **messages** (array of objects):
An ordered list of conversational turns.
Each message object must include:
- **role** (enum: `"user"` or `"assistant"`):
Specifies the speaker of the message.
- **content** (string or array of content blocks):
The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.
_Example equivalence:_
```json
{"role": "user", "content": "Hello, Claude"}
```
is equivalent to:
```json
{"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
```
#### Optional Fields
- **metadata** (object):
Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
- **stop_sequences** (array of strings):
Custom sequences that, when encountered in the generated text, cause the model to stop.
- **stream** (boolean):
Indicates whether to stream the response using server-sent events.
- **system** (string or array):
A system prompt providing context or specific instructions to the model.
- **temperature** (number):
Controls randomness in the models responses. Valid range: `0 < temperature < 1`.
- **thinking** (object):
Configuration for enabling extended thinking. If enabled, it includes:
- **budget_tokens** (integer):
Minimum of 1024 tokens (and less than `max_tokens`).
- **type** (enum):
E.g., `"enabled"`.
- **tool_choice** (object):
Instructs how the model should utilize any provided tools.
- **tools** (array of objects):
Definitions for tools available to the model. Each tool includes:
- **name** (string):
The tools name.
- **description** (string):
A detailed description of the tool.
- **input_schema** (object):
A JSON schema describing the expected input format for the tool.
- **top_k** (integer):
Limits sampling to the top K options.
- **top_p** (number):
Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
## Response Format
---
Responses will be in the Anthropic messages API format.
#### Example Response
```json
{
"content": [
{
"text": "Hi! My name is Claude.",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-7-sonnet-20250219",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 2095,
"output_tokens": 503,
"cache_creation_input_tokens": 2095,
"cache_read_input_tokens": 0
}
}
```
#### Response fields
- **content** (array of objects):
Contains the generated content blocks from the model. Each block includes:
- **type** (string):
Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
- **text** (string):
The generated text from the model.
_Note: Maximum length is 5,000,000 characters._
- **citations** (array of objects or `null`):
Optional field providing citation details. Each citation includes:
- **cited_text** (string):
The excerpt being cited.
- **document_index** (integer):
An index referencing the cited document.
- **document_title** (string or `null`):
The title of the cited document.
- **start_char_index** (integer):
The starting character index for the citation.
- **end_char_index** (integer):
The ending character index for the citation.
- **type** (string):
Typically `"char_location"`.
- **id** (string):
A unique identifier for the response message.
_Note: The format and length of IDs may change over time._
- **model** (string):
Specifies the model that generated the response.
- **role** (string):
Indicates the role of the generated message. For responses, this is always `"assistant"`.
- **stop_reason** (string):
Explains why the model stopped generating text. Possible values include:
- `"end_turn"`: The model reached a natural stopping point.
- `"max_tokens"`: The generation stopped because the maximum token limit was reached.
- `"stop_sequence"`: A custom stop sequence was encountered.
- `"tool_use"`: The model invoked one or more tools.
- **stop_sequence** (string or `null`):
Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
- **type** (string):
Denotes the type of response object, which is always `"message"`.
- **usage** (object):
Provides details on token usage for billing and rate limiting. This includes:
- **input_tokens** (integer):
Total number of input tokens processed.
- **output_tokens** (integer):
Total number of output tokens generated.
- **cache_creation_input_tokens** (integer or `null`):
Number of tokens used to create a cache entry.
- **cache_read_input_tokens** (integer or `null`):
Number of tokens read from the cache.

View file

@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk # Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py) [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching/caching.py)
:::info :::info

View file

@ -1,3 +1,5 @@
import Image from '@theme/IdealImage';
# Enterprise # Enterprise
For companies that need SSO, user management and professional support for LiteLLM Proxy For companies that need SSO, user management and professional support for LiteLLM Proxy
@ -7,6 +9,8 @@ Get free 7-day trial key [here](https://www.litellm.ai/#trial)
Includes all enterprise features. Includes all enterprise features.
<Image img={require('../img/enterprise_vs_oss.png')} />
[**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs) [**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)

View file

@ -1035,8 +1035,10 @@ response = completion(
"content": [ "content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."}, {"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{ {
"type": "image_url", "type": "file",
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF "file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
}, },
], ],
} }
@ -1081,8 +1083,10 @@ curl http://0.0.0.0:4000/v1/chat/completions \
"text": "You are a very professional document summarization specialist. Please summarize the given document" "text": "You are a very professional document summarization specialist. Please summarize the given document"
}, },
{ {
"type": "image_url", "type": "file",
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF "file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
} }
} }
] ]

View file

@ -1168,14 +1168,22 @@ os.environ["AWS_REGION_NAME"] = ""
# pdf url # pdf url
image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
# Download the file
response = requests.get(url)
file_data = response.content
encoded_file = base64.b64encode(file_data).decode("utf-8")
# model # model
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0" model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
image_content = [ image_content = [
{"type": "text", "text": "What's this file about?"}, {"type": "text", "text": "What's this file about?"},
{ {
"type": "image_url", "type": "file",
"image_url": image_url, # OR {"url": image_url} "file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
}, },
] ]
@ -1221,8 +1229,10 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
"messages": [ "messages": [
{"role": "user", "content": {"type": "text", "text": "What's this file about?"}}, {"role": "user", "content": {"type": "text", "text": "What's this file about?"}},
{ {
"type": "image_url", "type": "file",
"image_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", "file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
} }
] ]
}' }'

View file

@ -365,7 +365,7 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
</Tabs> </Tabs>
## Specifying Safety Settings ## Specifying Safety Settings
In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example: In certain use-cases you may need to make calls to the models and pass [safety settings](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
```python ```python
response = completion( response = completion(

View file

@ -82,7 +82,7 @@ from litellm import completion
os.environ["XAI_API_KEY"] = "your-api-key" os.environ["XAI_API_KEY"] = "your-api-key"
response = completion( response = completion(
model="xai/grok-2-latest", model="xai/grok-2-vision-latest",
messages=[ messages=[
{ {
"role": "user", "role": "user",

View file

@ -23,6 +23,12 @@ In the newly created guard's page, you can find a reference to the prompt policy
You can decide which detections will be enabled, and set the threshold for each detection. You can decide which detections will be enabled, and set the threshold for each detection.
:::info
When using LiteLLM with virtual keys, key-specific policies can be set directly in Aim's guards page by specifying the virtual key alias when creating the guard.
Only the aliases of your virtual keys (and not the actual key secrets) will be sent to Aim.
:::
### 3. Add Aim Guardrail on your LiteLLM config.yaml ### 3. Add Aim Guardrail on your LiteLLM config.yaml
Define your guardrails under the `guardrails` section Define your guardrails under the `guardrails` section

View file

@ -17,6 +17,14 @@ model_list:
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
guardrails: guardrails:
- guardrail_name: general-guard
litellm_params:
guardrail: aim
mode: [pre_call, post_call]
api_key: os.environ/AIM_API_KEY
api_base: os.environ/AIM_API_BASE
default_on: true # Optional
- guardrail_name: "aporia-pre-guard" - guardrail_name: "aporia-pre-guard"
litellm_params: litellm_params:
guardrail: aporia # supported values: "aporia", "lakera" guardrail: aporia # supported values: "aporia", "lakera"
@ -45,6 +53,7 @@ guardrails:
- `pre_call` Run **before** LLM call, on **input** - `pre_call` Run **before** LLM call, on **input**
- `post_call` Run **after** LLM call, on **input & output** - `post_call` Run **after** LLM call, on **input & output**
- `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call. Response not returned until guardrail check completes - `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call. Response not returned until guardrail check completes
- A list of the above values to run multiple modes, e.g. `mode: [pre_call, post_call]`
## 2. Start LiteLLM Gateway ## 2. Start LiteLLM Gateway

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

View file

@ -137,6 +137,7 @@ const sidebars = {
label: "[Beta] Guardrails", label: "[Beta] Guardrails",
items: [ items: [
"proxy/guardrails/quick_start", "proxy/guardrails/quick_start",
...[
"proxy/guardrails/aim_security", "proxy/guardrails/aim_security",
"proxy/guardrails/aporia_api", "proxy/guardrails/aporia_api",
"proxy/guardrails/bedrock", "proxy/guardrails/bedrock",
@ -145,7 +146,8 @@ const sidebars = {
"proxy/guardrails/pii_masking_v2", "proxy/guardrails/pii_masking_v2",
"proxy/guardrails/secret_detection", "proxy/guardrails/secret_detection",
"proxy/guardrails/custom_guardrail", "proxy/guardrails/custom_guardrail",
"prompt_injection" "proxy/guardrails/prompt_injection",
].sort(),
], ],
}, },
{ {

Binary file not shown.

View file

@ -0,0 +1,4 @@
-- AlterTable
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "failed_requests" INTEGER NOT NULL DEFAULT 0,
ADD COLUMN "successful_requests" INTEGER NOT NULL DEFAULT 0;

7
litellm-proxy-extras/poetry.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
package = []
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "2cf39473e67ff0615f0a61c9d2ac9f02b38cc08cbb1bdb893d89bee002646623"

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm-proxy-extras" name = "litellm-proxy-extras"
version = "0.1.1" version = "0.1.2"
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package." description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
authors = ["BerriAI"] authors = ["BerriAI"]
readme = "README.md" readme = "README.md"
@ -22,7 +22,7 @@ requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "0.1.1" version = "0.1.2"
version_files = [ version_files = [
"pyproject.toml:version", "pyproject.toml:version",
"../requirements.txt:litellm-proxy-extras==", "../requirements.txt:litellm-proxy-extras==",

View file

@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
### ADAPTERS ### ### ADAPTERS ###
from .types.adapter import AdapterItem from .types.adapter import AdapterItem
import litellm.anthropic_interface as anthropic
adapters: List[AdapterItem] = [] adapters: List[AdapterItem] = []

View file

@ -3,4 +3,4 @@ import importlib_metadata
try: try:
version = importlib_metadata.version("litellm") version = importlib_metadata.version("litellm")
except Exception: except Exception:
pass version = "unknown"

View file

@ -0,0 +1,6 @@
"""
Anthropic module for LiteLLM
"""
from .messages import acreate, create
__all__ = ["acreate", "create"]

View file

@ -0,0 +1,117 @@
"""
Interface for Anthropic's messages API
Use this to call LLMs in Anthropic /messages Request/Response format
This is an __init__.py file to allow the following interface
- litellm.messages.acreate
- litellm.messages.create
"""
from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
anthropic_messages as _async_anthropic_messages,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
async def acreate(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = 1.0,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
**kwargs
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""
Async wrapper for Anthropic's messages API
Args:
max_tokens (int): Maximum tokens to generate (required)
messages (List[Dict]): List of message objects with role and content (required)
model (str): Model name to use (required)
metadata (Dict, optional): Request metadata
stop_sequences (List[str], optional): Custom stop sequences
stream (bool, optional): Whether to stream the response
system (str, optional): System prompt
temperature (float, optional): Sampling temperature (0.0 to 1.0)
thinking (Dict, optional): Extended thinking configuration
tool_choice (Dict, optional): Tool choice configuration
tools (List[Dict], optional): List of tool definitions
top_k (int, optional): Top K sampling parameter
top_p (float, optional): Nucleus sampling parameter
**kwargs: Additional arguments
Returns:
Dict: Response from the API
"""
return await _async_anthropic_messages(
max_tokens=max_tokens,
messages=messages,
model=model,
metadata=metadata,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
**kwargs,
)
async def create(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = 1.0,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
**kwargs
) -> Union[AnthropicMessagesResponse, Iterator]:
"""
Async wrapper for Anthropic's messages API
Args:
max_tokens (int): Maximum tokens to generate (required)
messages (List[Dict]): List of message objects with role and content (required)
model (str): Model name to use (required)
metadata (Dict, optional): Request metadata
stop_sequences (List[str], optional): Custom stop sequences
stream (bool, optional): Whether to stream the response
system (str, optional): System prompt
temperature (float, optional): Sampling temperature (0.0 to 1.0)
thinking (Dict, optional): Extended thinking configuration
tool_choice (Dict, optional): Tool choice configuration
tools (List[Dict], optional): List of tool definitions
top_k (int, optional): Top K sampling parameter
top_p (float, optional): Nucleus sampling parameter
**kwargs: Additional arguments
Returns:
Dict: Response from the API
"""
raise NotImplementedError("This function is not implemented")

View file

@ -0,0 +1,116 @@
## Use LLM API endpoints in Anthropic Interface
Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
## Usage
---
### LiteLLM Python SDK
#### Non-streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
Example response:
```json
{
"content": [
{
"text": "Hi! this is a very short joke",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-7-sonnet-20250219",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 2095,
"output_tokens": 503,
"cache_creation_input_tokens": 2095,
"cache_read_input_tokens": 0
}
}
```
#### Streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
stream=True,
)
async for chunk in response:
print(chunk)
```
### LiteLLM Proxy Server
1. Setup config.yaml
```yaml
model_list:
- model_name: anthropic-claude
litellm_params:
model: claude-3-7-sonnet-latest
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
<Tabs>
<TabItem label="Anthropic Python SDK" value="python">
```python showLineNumbers title="Example using LiteLLM Proxy Server"
import anthropic
# point anthropic sdk to litellm proxy
client = anthropic.Anthropic(
base_url="http://0.0.0.0:4000",
api_key="sk-1234",
)
response = client.messages.create(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
</TabItem>
<TabItem label="curl" value="curl">
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
-H 'content-type: application/json' \
-H 'x-api-key: $LITELLM_API_KEY' \
-H 'anthropic-version: 2023-06-01' \
-d '{
"model": "anthropic-claude",
"messages": [
{
"role": "user",
"content": "Hello, can you tell me a short joke?"
}
],
"max_tokens": 100
}'
```

View file

@ -19,6 +19,7 @@ DEFAULT_IMAGE_HEIGHT = 300
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic. SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer" REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer"
MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100 MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
#### RELIABILITY #### #### RELIABILITY ####
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives. REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.

View file

@ -550,6 +550,7 @@ def completion_cost( # noqa: PLR0915
custom_pricing: Optional[bool] = None, custom_pricing: Optional[bool] = None,
base_model: Optional[str] = None, base_model: Optional[str] = None,
standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None, standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
litellm_model_name: Optional[str] = None,
) -> float: ) -> float:
""" """
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm. Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@ -602,7 +603,7 @@ def completion_cost( # noqa: PLR0915
completion_response=completion_response completion_response=completion_response
) )
rerank_billed_units: Optional[RerankBilledUnits] = None rerank_billed_units: Optional[RerankBilledUnits] = None
model = _select_model_name_for_cost_calc( selected_model = _select_model_name_for_cost_calc(
model=model, model=model,
completion_response=completion_response, completion_response=completion_response,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
@ -610,16 +611,24 @@ def completion_cost( # noqa: PLR0915
base_model=base_model, base_model=base_model,
) )
verbose_logger.info(f"selected model name for cost calculation: {model}") potential_model_names = [selected_model]
if model is not None:
potential_model_names.append(model)
for idx, model in enumerate(potential_model_names):
try:
verbose_logger.info(
f"selected model name for cost calculation: {model}"
)
if completion_response is not None and ( if completion_response is not None and (
isinstance(completion_response, BaseModel) isinstance(completion_response, BaseModel)
or isinstance(completion_response, dict) or isinstance(completion_response, dict)
): # tts returns a custom class ): # tts returns a custom class
if isinstance(completion_response, dict): if isinstance(completion_response, dict):
usage_obj: Optional[Union[dict, Usage]] = completion_response.get( usage_obj: Optional[
"usage", {} Union[dict, Usage]
) ] = completion_response.get("usage", {})
else: else:
usage_obj = getattr(completion_response, "usage", {}) usage_obj = getattr(completion_response, "usage", {})
if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects( if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
@ -638,16 +647,16 @@ def completion_cost( # noqa: PLR0915
_usage = usage_obj _usage = usage_obj
if ResponseAPILoggingUtils._is_response_api_usage(_usage): if ResponseAPILoggingUtils._is_response_api_usage(_usage):
_usage = ( _usage = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
_usage _usage
).model_dump() ).model_dump()
)
# get input/output tokens from completion_response # get input/output tokens from completion_response
prompt_tokens = _usage.get("prompt_tokens", 0) prompt_tokens = _usage.get("prompt_tokens", 0)
completion_tokens = _usage.get("completion_tokens", 0) completion_tokens = _usage.get("completion_tokens", 0)
cache_creation_input_tokens = _usage.get("cache_creation_input_tokens", 0) cache_creation_input_tokens = _usage.get(
"cache_creation_input_tokens", 0
)
cache_read_input_tokens = _usage.get("cache_read_input_tokens", 0) cache_read_input_tokens = _usage.get("cache_read_input_tokens", 0)
if ( if (
"prompt_tokens_details" in _usage "prompt_tokens_details" in _usage
@ -655,7 +664,9 @@ def completion_cost( # noqa: PLR0915
and _usage["prompt_tokens_details"] and _usage["prompt_tokens_details"]
): ):
prompt_tokens_details = _usage.get("prompt_tokens_details", {}) prompt_tokens_details = _usage.get("prompt_tokens_details", {})
cache_read_input_tokens = prompt_tokens_details.get("cached_tokens", 0) cache_read_input_tokens = prompt_tokens_details.get(
"cached_tokens", 0
)
total_time = getattr(completion_response, "_response_ms", 0) total_time = getattr(completion_response, "_response_ms", 0)
@ -703,7 +714,8 @@ def completion_cost( # noqa: PLR0915
if ( if (
call_type == CallTypes.image_generation.value call_type == CallTypes.image_generation.value
or call_type == CallTypes.aimage_generation.value or call_type == CallTypes.aimage_generation.value
or call_type == PassthroughCallTypes.passthrough_image_generation.value or call_type
== PassthroughCallTypes.passthrough_image_generation.value
): ):
### IMAGE GENERATION COST CALCULATION ### ### IMAGE GENERATION COST CALCULATION ###
if custom_llm_provider == "vertex_ai": if custom_llm_provider == "vertex_ai":
@ -733,7 +745,8 @@ def completion_cost( # noqa: PLR0915
optional_params=optional_params, optional_params=optional_params,
) )
elif ( elif (
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value call_type == CallTypes.speech.value
or call_type == CallTypes.aspeech.value
): ):
prompt_characters = litellm.utils._count_characters(text=prompt) prompt_characters = litellm.utils._count_characters(text=prompt)
elif ( elif (
@ -744,7 +757,8 @@ def completion_cost( # noqa: PLR0915
completion_response, "duration", 0.0 completion_response, "duration", 0.0
) )
elif ( elif (
call_type == CallTypes.rerank.value or call_type == CallTypes.arerank.value call_type == CallTypes.rerank.value
or call_type == CallTypes.arerank.value
): ):
if completion_response is not None and isinstance( if completion_response is not None and isinstance(
completion_response, RerankResponse completion_response, RerankResponse
@ -773,7 +787,9 @@ def completion_cost( # noqa: PLR0915
# together ai prices based on size of llm # together ai prices based on size of llm
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json # get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
model = get_model_params_and_category(model, call_type=CallTypes(call_type)) model = get_model_params_and_category(
model, call_type=CallTypes(call_type)
)
# replicate llms are calculate based on time for request running # replicate llms are calculate based on time for request running
# see https://replicate.com/pricing # see https://replicate.com/pricing
@ -788,14 +804,19 @@ def completion_cost( # noqa: PLR0915
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}" f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
) )
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai": if (
custom_llm_provider is not None
and custom_llm_provider == "vertex_ai"
):
# Calculate the prompt characters + response characters # Calculate the prompt characters + response characters
if len(messages) > 0: if len(messages) > 0:
prompt_string = litellm.utils.get_formatted_prompt( prompt_string = litellm.utils.get_formatted_prompt(
data={"messages": messages}, call_type="completion" data={"messages": messages}, call_type="completion"
) )
prompt_characters = litellm.utils._count_characters(text=prompt_string) prompt_characters = litellm.utils._count_characters(
text=prompt_string
)
if completion_response is not None and isinstance( if completion_response is not None and isinstance(
completion_response, ModelResponse completion_response, ModelResponse
): ):
@ -823,19 +844,35 @@ def completion_cost( # noqa: PLR0915
cache_creation_input_tokens=cache_creation_input_tokens, cache_creation_input_tokens=cache_creation_input_tokens,
cache_read_input_tokens=cache_read_input_tokens, cache_read_input_tokens=cache_read_input_tokens,
usage_object=cost_per_token_usage_object, usage_object=cost_per_token_usage_object,
call_type=call_type, call_type=cast(CallTypesLiteral, call_type),
audio_transcription_file_duration=audio_transcription_file_duration, audio_transcription_file_duration=audio_transcription_file_duration,
rerank_billed_units=rerank_billed_units, rerank_billed_units=rerank_billed_units,
) )
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar _final_cost = (
_final_cost += StandardBuiltInToolCostTracking.get_cost_for_built_in_tools( prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
_final_cost += (
StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
model=model, model=model,
response_object=completion_response, response_object=completion_response,
standard_built_in_tools_params=standard_built_in_tools_params, standard_built_in_tools_params=standard_built_in_tools_params,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
) )
)
return _final_cost return _final_cost
except Exception as e:
verbose_logger.debug(
"litellm.cost_calculator.py::completion_cost() - Error calculating cost for model={} - {}".format(
model, str(e)
)
)
if idx == len(potential_model_names) - 1:
raise e
raise Exception(
"Unable to calculat cost for received potential model names - {}".format(
potential_model_names
)
)
except Exception as e: except Exception as e:
raise e raise e
@ -897,6 +934,7 @@ def response_cost_calculator(
custom_pricing: Optional[bool] = None, custom_pricing: Optional[bool] = None,
prompt: str = "", prompt: str = "",
standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None, standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
litellm_model_name: Optional[str] = None,
) -> float: ) -> float:
""" """
Returns Returns

View file

@ -290,6 +290,7 @@ class Logging(LiteLLMLoggingBaseClass):
"input": _input, "input": _input,
"litellm_params": litellm_params, "litellm_params": litellm_params,
"applied_guardrails": applied_guardrails, "applied_guardrails": applied_guardrails,
"model": model,
} }
def process_dynamic_callbacks(self): def process_dynamic_callbacks(self):
@ -892,6 +893,7 @@ class Logging(LiteLLMLoggingBaseClass):
ResponseCompletedEvent, ResponseCompletedEvent,
], ],
cache_hit: Optional[bool] = None, cache_hit: Optional[bool] = None,
litellm_model_name: Optional[str] = None,
) -> Optional[float]: ) -> Optional[float]:
""" """
Calculate response cost using result + logging object variables. Calculate response cost using result + logging object variables.
@ -917,7 +919,7 @@ class Logging(LiteLLMLoggingBaseClass):
try: try:
response_cost_calculator_kwargs = { response_cost_calculator_kwargs = {
"response_object": result, "response_object": result,
"model": self.model, "model": litellm_model_name or self.model,
"cache_hit": cache_hit, "cache_hit": cache_hit,
"custom_llm_provider": self.model_call_details.get( "custom_llm_provider": self.model_call_details.get(
"custom_llm_provider", None "custom_llm_provider", None
@ -1009,6 +1011,10 @@ class Logging(LiteLLMLoggingBaseClass):
return False return False
return True return True
def _update_completion_start_time(self, completion_start_time: datetime.datetime):
self.completion_start_time = completion_start_time
self.model_call_details["completion_start_time"] = self.completion_start_time
def _success_handler_helper_fn( def _success_handler_helper_fn(
self, self,
result=None, result=None,

View file

@ -22,6 +22,7 @@ from litellm.types.llms.openai import (
AllMessageValues, AllMessageValues,
ChatCompletionAssistantMessage, ChatCompletionAssistantMessage,
ChatCompletionAssistantToolCall, ChatCompletionAssistantToolCall,
ChatCompletionFileObject,
ChatCompletionFunctionMessage, ChatCompletionFunctionMessage,
ChatCompletionImageObject, ChatCompletionImageObject,
ChatCompletionTextObject, ChatCompletionTextObject,
@ -1455,6 +1456,25 @@ def anthropic_messages_pt( # noqa: PLR0915
user_content.append(_content_element) user_content.append(_content_element)
elif m.get("type", "") == "document": elif m.get("type", "") == "document":
user_content.append(cast(AnthropicMessagesDocumentParam, m)) user_content.append(cast(AnthropicMessagesDocumentParam, m))
elif m.get("type", "") == "file":
file_message = cast(ChatCompletionFileObject, m)
file_data = file_message["file"].get("file_data")
if file_data:
image_chunk = convert_to_anthropic_image_obj(
openai_image_url=file_data,
format=file_message["file"].get("format"),
)
anthropic_document_param = (
AnthropicMessagesDocumentParam(
type="document",
source=AnthropicContentParamSource(
type="base64",
media_type=image_chunk["media_type"],
data=image_chunk["data"],
),
)
)
user_content.append(anthropic_document_param)
elif isinstance(user_message_types_block["content"], str): elif isinstance(user_message_types_block["content"], str):
_anthropic_content_text_element: AnthropicMessagesTextParam = { _anthropic_content_text_element: AnthropicMessagesTextParam = {
"type": "text", "type": "text",
@ -2885,6 +2905,11 @@ class BedrockConverseMessagesProcessor:
image_url=image_url, format=format image_url=image_url, format=format
) )
_parts.append(_part) # type: ignore _parts.append(_part) # type: ignore
elif element["type"] == "file":
_part = await BedrockConverseMessagesProcessor._async_process_file_message(
message=cast(ChatCompletionFileObject, element)
)
_parts.append(_part)
_cache_point_block = ( _cache_point_block = (
litellm.AmazonConverseConfig()._get_cache_point_block( litellm.AmazonConverseConfig()._get_cache_point_block(
message_block=cast( message_block=cast(
@ -3054,6 +3079,45 @@ class BedrockConverseMessagesProcessor:
reasoning_content_blocks.append(bedrock_content_block) reasoning_content_blocks.append(bedrock_content_block)
return reasoning_content_blocks return reasoning_content_blocks
@staticmethod
def _process_file_message(message: ChatCompletionFileObject) -> BedrockContentBlock:
file_message = message["file"]
file_data = file_message.get("file_data")
file_id = file_message.get("file_id")
if file_data is None and file_id is None:
raise litellm.BadRequestError(
message="file_data and file_id cannot both be None. Got={}".format(
message
),
model="",
llm_provider="bedrock",
)
format = file_message.get("format")
return BedrockImageProcessor.process_image_sync(
image_url=cast(str, file_id or file_data), format=format
)
@staticmethod
async def _async_process_file_message(
message: ChatCompletionFileObject,
) -> BedrockContentBlock:
file_message = message["file"]
file_data = file_message.get("file_data")
file_id = file_message.get("file_id")
format = file_message.get("format")
if file_data is None and file_id is None:
raise litellm.BadRequestError(
message="file_data and file_id cannot both be None. Got={}".format(
message
),
model="",
llm_provider="bedrock",
)
return await BedrockImageProcessor.process_image_async(
image_url=cast(str, file_id or file_data), format=format
)
def _bedrock_converse_messages_pt( # noqa: PLR0915 def _bedrock_converse_messages_pt( # noqa: PLR0915
messages: List, messages: List,
@ -3126,6 +3190,13 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
format=format, format=format,
) )
_parts.append(_part) # type: ignore _parts.append(_part) # type: ignore
elif element["type"] == "file":
_part = (
BedrockConverseMessagesProcessor._process_file_message(
message=cast(ChatCompletionFileObject, element)
)
)
_parts.append(_part)
_cache_point_block = ( _cache_point_block = (
litellm.AmazonConverseConfig()._get_cache_point_block( litellm.AmazonConverseConfig()._get_cache_point_block(
message_block=cast( message_block=cast(

View file

@ -1,5 +1,6 @@
import asyncio import asyncio
import collections.abc import collections.abc
import datetime
import json import json
import threading import threading
import time import time
@ -1567,6 +1568,10 @@ class CustomStreamWrapper:
if response is None: if response is None:
continue continue
if self.logging_obj.completion_start_time is None:
self.logging_obj._update_completion_start_time(
completion_start_time=datetime.datetime.now()
)
## LOGGING ## LOGGING
executor.submit( executor.submit(
self.run_success_logging_and_cache_storage, self.run_success_logging_and_cache_storage,
@ -1721,6 +1726,11 @@ class CustomStreamWrapper:
if processed_chunk is None: if processed_chunk is None:
continue continue
if self.logging_obj.completion_start_time is None:
self.logging_obj._update_completion_start_time(
completion_start_time=datetime.datetime.now()
)
choice = processed_chunk.choices[0] choice = processed_chunk.choices[0]
if isinstance(choice, StreamingChoices): if isinstance(choice, StreamingChoices):
self.response_uptil_now += choice.delta.get("content", "") or "" self.response_uptil_now += choice.delta.get("content", "") or ""

View file

@ -18,8 +18,10 @@ from litellm.types.llms.anthropic import (
AnthropicMessagesTool, AnthropicMessagesTool,
AnthropicMessagesToolChoice, AnthropicMessagesToolChoice,
AnthropicSystemMessageContent, AnthropicSystemMessageContent,
AnthropicThinkingParam,
) )
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
REASONING_EFFORT,
AllMessageValues, AllMessageValues,
ChatCompletionCachedContent, ChatCompletionCachedContent,
ChatCompletionSystemMessage, ChatCompletionSystemMessage,
@ -94,6 +96,7 @@ class AnthropicConfig(BaseConfig):
"parallel_tool_calls", "parallel_tool_calls",
"response_format", "response_format",
"user", "user",
"reasoning_effort",
] ]
if "claude-3-7-sonnet" in model: if "claude-3-7-sonnet" in model:
@ -141,15 +144,9 @@ class AnthropicConfig(BaseConfig):
if user_anthropic_beta_headers is not None: if user_anthropic_beta_headers is not None:
betas.update(user_anthropic_beta_headers) betas.update(user_anthropic_beta_headers)
# Handle beta headers for Vertex AI # Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
# We allow prompt caching beta header for Vertex, but exclude other beta headers that might cause issues
if is_vertex_request is True: if is_vertex_request is True:
vertex_safe_betas = set() pass
# Allow prompt caching beta header for Vertex
if "prompt-caching-2024-07-31" in betas:
vertex_safe_betas.add("prompt-caching-2024-07-31")
if len(vertex_safe_betas) > 0:
headers["anthropic-beta"] = ",".join(vertex_safe_betas)
elif len(betas) > 0: elif len(betas) > 0:
headers["anthropic-beta"] = ",".join(betas) headers["anthropic-beta"] = ",".join(betas)
@ -297,6 +294,21 @@ class AnthropicConfig(BaseConfig):
new_stop = new_v new_stop = new_v
return new_stop return new_stop
@staticmethod
def _map_reasoning_effort(
reasoning_effort: Optional[Union[REASONING_EFFORT, str]]
) -> Optional[AnthropicThinkingParam]:
if reasoning_effort is None:
return None
elif reasoning_effort == "low":
return AnthropicThinkingParam(type="enabled", budget_tokens=1024)
elif reasoning_effort == "medium":
return AnthropicThinkingParam(type="enabled", budget_tokens=2048)
elif reasoning_effort == "high":
return AnthropicThinkingParam(type="enabled", budget_tokens=4096)
else:
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
def map_openai_params( def map_openai_params(
self, self,
non_default_params: dict, non_default_params: dict,
@ -308,10 +320,6 @@ class AnthropicConfig(BaseConfig):
non_default_params=non_default_params non_default_params=non_default_params
) )
## handle thinking tokens
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
for param, value in non_default_params.items(): for param, value in non_default_params.items():
if param == "max_tokens": if param == "max_tokens":
optional_params["max_tokens"] = value optional_params["max_tokens"] = value
@ -376,7 +384,15 @@ class AnthropicConfig(BaseConfig):
optional_params["metadata"] = {"user_id": value} optional_params["metadata"] = {"user_id": value}
if param == "thinking": if param == "thinking":
optional_params["thinking"] = value optional_params["thinking"] = value
elif param == "reasoning_effort" and isinstance(value, str):
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
value
)
## handle thinking tokens
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
return optional_params return optional_params
def _create_json_tool_call_for_response_format( def _create_json_tool_call_for_response_format(

View file

@ -6,7 +6,7 @@
""" """
import json import json
from typing import Any, AsyncIterator, Dict, Optional, Union, cast from typing import AsyncIterator, Dict, List, Optional, Union, cast
import httpx import httpx
@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler, AsyncHTTPHandler,
get_async_httpx_client, get_async_httpx_client,
) )
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
from litellm.types.router import GenericLiteLLMParams from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import ProviderSpecificHeader from litellm.types.utils import ProviderSpecificHeader
from litellm.utils import ProviderConfigManager, client from litellm.utils import ProviderConfigManager, client
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
@client @client
async def anthropic_messages( async def anthropic_messages(
api_key: str, max_tokens: int,
messages: List[Dict],
model: str, model: str,
stream: bool = False, metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None, api_base: Optional[str] = None,
client: Optional[AsyncHTTPHandler] = None, client: Optional[AsyncHTTPHandler] = None,
custom_llm_provider: Optional[str] = None, custom_llm_provider: Optional[str] = None,
**kwargs, **kwargs,
) -> Union[Dict[str, Any], AsyncIterator]: ) -> Union[AnthropicMessagesResponse, AsyncIterator]:
""" """
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
""" """
@ -129,10 +143,8 @@ async def anthropic_messages(
}, },
custom_llm_provider=_custom_llm_provider, custom_llm_provider=_custom_llm_provider,
) )
litellm_logging_obj.model_call_details.update(kwargs)
# Prepare request body # Prepare request body
request_body = kwargs.copy() request_body = locals().copy()
request_body = { request_body = {
k: v k: v
for k, v in request_body.items() for k, v in request_body.items()
@ -140,10 +152,12 @@ async def anthropic_messages(
in anthropic_messages_provider_config.get_supported_anthropic_messages_params( in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
model=model model=model
) )
and v is not None
} }
request_body["stream"] = stream request_body["stream"] = stream
request_body["model"] = model request_body["model"] = model
litellm_logging_obj.stream = stream litellm_logging_obj.stream = stream
litellm_logging_obj.model_call_details.update(request_body)
# Make the request # Make the request
request_url = anthropic_messages_provider_config.get_complete_url( request_url = anthropic_messages_provider_config.get_complete_url(
@ -164,7 +178,7 @@ async def anthropic_messages(
url=request_url, url=request_url,
headers=headers, headers=headers,
data=json.dumps(request_body), data=json.dumps(request_body),
stream=stream, stream=stream or False,
) )
response.raise_for_status() response.raise_for_status()

View file

@ -104,7 +104,10 @@ class BaseConfig(ABC):
return type_to_response_format_param(response_format=response_format) return type_to_response_format_param(response_format=response_format)
def is_thinking_enabled(self, non_default_params: dict) -> bool: def is_thinking_enabled(self, non_default_params: dict) -> bool:
return non_default_params.get("thinking", {}).get("type", None) == "enabled" return (
non_default_params.get("thinking", {}).get("type") == "enabled"
or non_default_params.get("reasoning_effort") is not None
)
def update_optional_params_with_thinking_tokens( def update_optional_params_with_thinking_tokens(
self, non_default_params: dict, optional_params: dict self, non_default_params: dict, optional_params: dict
@ -116,9 +119,9 @@ class BaseConfig(ABC):
if 'thinking' is enabled and 'max_tokens' is not specified, set 'max_tokens' to the thinking token budget + DEFAULT_MAX_TOKENS if 'thinking' is enabled and 'max_tokens' is not specified, set 'max_tokens' to the thinking token budget + DEFAULT_MAX_TOKENS
""" """
is_thinking_enabled = self.is_thinking_enabled(non_default_params) is_thinking_enabled = self.is_thinking_enabled(optional_params)
if is_thinking_enabled and "max_tokens" not in non_default_params: if is_thinking_enabled and "max_tokens" not in non_default_params:
thinking_token_budget = cast(dict, non_default_params["thinking"]).get( thinking_token_budget = cast(dict, optional_params["thinking"]).get(
"budget_tokens", None "budget_tokens", None
) )
if thinking_token_budget is not None: if thinking_token_budget is not None:

View file

@ -17,6 +17,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
_bedrock_converse_messages_pt, _bedrock_converse_messages_pt,
_bedrock_tools_pt, _bedrock_tools_pt,
) )
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.types.llms.bedrock import * from litellm.types.llms.bedrock import *
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
@ -128,6 +129,7 @@ class AmazonConverseConfig(BaseConfig):
"claude-3-7" in model "claude-3-7" in model
): # [TODO]: move to a 'supports_reasoning_content' param from model cost map ): # [TODO]: move to a 'supports_reasoning_content' param from model cost map
supported_params.append("thinking") supported_params.append("thinking")
supported_params.append("reasoning_effort")
return supported_params return supported_params
def map_tool_choice_values( def map_tool_choice_values(
@ -218,9 +220,7 @@ class AmazonConverseConfig(BaseConfig):
messages: Optional[List[AllMessageValues]] = None, messages: Optional[List[AllMessageValues]] = None,
) -> dict: ) -> dict:
is_thinking_enabled = self.is_thinking_enabled(non_default_params) is_thinking_enabled = self.is_thinking_enabled(non_default_params)
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
for param, value in non_default_params.items(): for param, value in non_default_params.items():
if param == "response_format" and isinstance(value, dict): if param == "response_format" and isinstance(value, dict):
ignore_response_format_types = ["text"] ignore_response_format_types = ["text"]
@ -297,6 +297,14 @@ class AmazonConverseConfig(BaseConfig):
optional_params["tool_choice"] = _tool_choice_value optional_params["tool_choice"] = _tool_choice_value
if param == "thinking": if param == "thinking":
optional_params["thinking"] = value optional_params["thinking"] = value
elif param == "reasoning_effort" and isinstance(value, str):
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
value
)
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
return optional_params return optional_params

View file

@ -12,6 +12,7 @@ import httpx
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.chat.transformation import BaseLLMException from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.types.llms.openrouter import OpenRouterErrorMessage
from litellm.types.utils import ModelResponse, ModelResponseStream from litellm.types.utils import ModelResponse, ModelResponseStream
from ...openai.chat.gpt_transformation import OpenAIGPTConfig from ...openai.chat.gpt_transformation import OpenAIGPTConfig
@ -71,6 +72,24 @@ class OpenrouterConfig(OpenAIGPTConfig):
class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator): class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
def chunk_parser(self, chunk: dict) -> ModelResponseStream: def chunk_parser(self, chunk: dict) -> ModelResponseStream:
try: try:
## HANDLE ERROR IN CHUNK ##
if "error" in chunk:
error_chunk = chunk["error"]
error_message = OpenRouterErrorMessage(
message="Message: {}, Metadata: {}, User ID: {}".format(
error_chunk["message"],
error_chunk.get("metadata", {}),
error_chunk.get("user_id", ""),
),
code=error_chunk["code"],
metadata=error_chunk.get("metadata", {}),
)
raise OpenRouterException(
message=error_message["message"],
status_code=error_message["code"],
headers=error_message["metadata"].get("headers", {}),
)
new_choices = [] new_choices = []
for choice in chunk["choices"]: for choice in chunk["choices"]:
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning") choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")

View file

@ -127,12 +127,17 @@ class AWSEventStreamDecoder:
async for chunk in iterator: async for chunk in iterator:
event_stream_buffer.add_data(chunk) event_stream_buffer.add_data(chunk)
for event in event_stream_buffer: for event in event_stream_buffer:
try:
message = self._parse_message_from_event(event) message = self._parse_message_from_event(event)
if message: if message:
verbose_logger.debug("sagemaker parsed chunk bytes %s", message) verbose_logger.debug(
"sagemaker parsed chunk bytes %s", message
)
# remove data: prefix and "\n\n" at the end # remove data: prefix and "\n\n" at the end
message = ( message = (
litellm.CustomStreamWrapper._strip_sse_data_from_chunk(message) litellm.CustomStreamWrapper._strip_sse_data_from_chunk(
message
)
or "" or ""
) )
message = message.replace("\n\n", "") message = message.replace("\n\n", "")
@ -141,7 +146,6 @@ class AWSEventStreamDecoder:
accumulated_json += message accumulated_json += message
# Try to parse the accumulated JSON # Try to parse the accumulated JSON
try:
_data = json.loads(accumulated_json) _data = json.loads(accumulated_json)
if self.is_messages_api: if self.is_messages_api:
yield self._chunk_parser_messages_api(chunk_data=_data) yield self._chunk_parser_messages_api(chunk_data=_data)
@ -152,6 +156,16 @@ class AWSEventStreamDecoder:
except json.JSONDecodeError: except json.JSONDecodeError:
# If it's not valid JSON yet, continue to the next event # If it's not valid JSON yet, continue to the next event
continue continue
except UnicodeDecodeError as e:
verbose_logger.warning(
f"UnicodeDecodeError: {e}. Attempting to combine with next event."
)
continue
except Exception as e:
verbose_logger.error(
f"Error parsing message: {e}. Attempting to combine with next event."
)
continue
# Handle any remaining data after the iterator is exhausted # Handle any remaining data after the iterator is exhausted
if accumulated_json: if accumulated_json:
@ -167,6 +181,8 @@ class AWSEventStreamDecoder:
f"Warning: Unparseable JSON data remained: {accumulated_json}" f"Warning: Unparseable JSON data remained: {accumulated_json}"
) )
yield None yield None
except Exception as e:
verbose_logger.error(f"Final error parsing accumulated JSON: {e}")
def _parse_message_from_event(self, event) -> Optional[str]: def _parse_message_from_event(self, event) -> Optional[str]:
response_dict = event.to_response_dict() response_dict = event.to_response_dict()

View file

@ -4453,6 +4453,42 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"gemini-2.5-pro-exp-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_audio_input": true,
"supports_video_input": true,
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-pro-exp-02-05": { "gemini-2.0-pro-exp-02-05": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 2097152, "max_input_tokens": 2097152,
@ -10189,6 +10225,22 @@
"litellm_provider": "voyage", "litellm_provider": "voyage",
"mode": "rerank" "mode": "rerank"
}, },
"databricks/databricks-claude-3-7-sonnet": {
"max_tokens": 200000,
"max_input_tokens": 200000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.0000025,
"input_dbu_cost_per_token": 0.00003571,
"output_cost_per_token": 0.00017857,
"output_db_cost_per_token": 0.000214286,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_tool_choice": true
},
"databricks/databricks-meta-llama-3-1-405b-instruct": { "databricks/databricks-meta-llama-3-1-405b-instruct": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000, "max_input_tokens": 128000,
@ -10217,7 +10269,7 @@
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_tool_choice": true "supports_tool_choice": true
}, },
"databricks/meta-llama-3.3-70b-instruct": { "databricks/databricks-meta-llama-3-3-70b-instruct": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000, "max_input_tokens": 128000,
"max_output_tokens": 128000, "max_output_tokens": 128000,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[20314,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-48a36e9c9b2cb488.js\",\"250\",\"static/chunks/250-601568e45a5ffece.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-e21d4be3d6c3c16e.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"soi--ciJeUE6G2Fk4NMBG\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/1f6915676624c422.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[38411,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"274\",\"static/chunks/274-bddaf0cf6c91e72f.js\",\"250\",\"static/chunks/250-dfc03a6fb4f0d254.js\",\"699\",\"static/chunks/699-87224ecba28f1f48.js\",\"931\",\"static/chunks/app/page-0f46d4a8b9bdf1c0.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Yb50LG5p7c9QpG54GIoFV\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/1f6915676624c422.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"] 2:I[19107,[],"ClientPageRoot"]
3:I[20314,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","394","static/chunks/394-48a36e9c9b2cb488.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","931","static/chunks/app/page-e21d4be3d6c3c16e.js"],"default",1] 3:I[38411,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","274","static/chunks/274-bddaf0cf6c91e72f.js","250","static/chunks/250-dfc03a6fb4f0d254.js","699","static/chunks/699-87224ecba28f1f48.js","931","static/chunks/app/page-0f46d4a8b9bdf1c0.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"] 2:I[19107,[],"ClientPageRoot"]
3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1] 3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-dfc03a6fb4f0d254.js","699","static/chunks/699-87224ecba28f1f48.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"] 2:I[19107,[],"ClientPageRoot"]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-601568e45a5ffece.js","461","static/chunks/app/onboarding/page-5110f2c6a3c9a2f4.js"],"default",1] 3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-dfc03a6fb4f0d254.js","461","static/chunks/app/onboarding/page-2bf7a26db5342dbf.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -20,14 +20,20 @@ model_list:
litellm_params: litellm_params:
model: gemini/gemini-2.0-flash model: gemini/gemini-2.0-flash
api_key: os.environ/GEMINI_API_KEY api_key: os.environ/GEMINI_API_KEY
- model_name: openrouter_model
litellm_params:
model: openrouter/openrouter_model
api_key: os.environ/OPENROUTER_API_KEY
api_base: http://0.0.0.0:8090
litellm_settings: litellm_settings:
num_retries: 0 num_retries: 0
callbacks: ["prometheus"] callbacks: ["prometheus"]
# json_logs: true # json_logs: true
# router_settings: router_settings:
# routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
# redis_host: os.environ/REDIS_HOST redis_host: os.environ/REDIS_HOST
# redis_password: os.environ/REDIS_PASSWORD redis_password: os.environ/REDIS_PASSWORD
# redis_port: os.environ/REDIS_PORT redis_port: os.environ/REDIS_PORT

View file

@ -432,6 +432,7 @@ class LiteLLMRoutes(enum.Enum):
"/model/new", "/model/new",
"/model/update", "/model/update",
"/model/delete", "/model/delete",
"/user/daily/activity",
] # routes that manage their own allowed/disallowed logic ] # routes that manage their own allowed/disallowed logic
## Org Admin Routes ## ## Org Admin Routes ##
@ -2736,6 +2737,8 @@ class DailyUserSpendTransaction(TypedDict):
completion_tokens: int completion_tokens: int
spend: float spend: float
api_requests: int api_requests: int
successful_requests: int
failed_requests: int
class DBSpendUpdateTransactions(TypedDict): class DBSpendUpdateTransactions(TypedDict):
@ -2749,3 +2752,9 @@ class DBSpendUpdateTransactions(TypedDict):
team_list_transactions: Optional[Dict[str, float]] team_list_transactions: Optional[Dict[str, float]]
team_member_list_transactions: Optional[Dict[str, float]] team_member_list_transactions: Optional[Dict[str, float]]
org_list_transactions: Optional[Dict[str, float]] org_list_transactions: Optional[Dict[str, float]]
class SpendUpdateQueueItem(TypedDict, total=False):
entity_type: Litellm_EntityType
entity_id: str
response_cost: Optional[float]

View file

@ -1,53 +0,0 @@
"""
Checks for LiteLLM service account keys
"""
from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
def check_if_token_is_service_account(valid_token: UserAPIKeyAuth) -> bool:
"""
Checks if the token is a service account
Returns:
bool: True if token is a service account
"""
if valid_token.metadata:
if "service_account_id" in valid_token.metadata:
return True
return False
async def service_account_checks(
valid_token: UserAPIKeyAuth, request_data: dict
) -> bool:
"""
If a virtual key is a service account, checks it's a valid service account
A token is a service account if it has a service_account_id in its metadata
Service Account Specific Checks:
- Check if required_params is set
"""
if check_if_token_is_service_account(valid_token) is not True:
return True
from litellm.proxy.proxy_server import general_settings
if "service_account_settings" in general_settings:
service_account_settings = general_settings["service_account_settings"]
if "enforced_params" in service_account_settings:
_enforced_params = service_account_settings["enforced_params"]
for param in _enforced_params:
if param not in request_data:
raise ProxyException(
type=ProxyErrorTypes.bad_request_error.value,
code=400,
param=param,
message=f"BadRequest please pass param={param} in request body. This is a required param for service account",
)
return True

View file

@ -49,7 +49,6 @@ from litellm.proxy.auth.auth_utils import (
from litellm.proxy.auth.handle_jwt import JWTAuthManager, JWTHandler from litellm.proxy.auth.handle_jwt import JWTAuthManager, JWTHandler
from litellm.proxy.auth.oauth2_check import check_oauth2_token from litellm.proxy.auth.oauth2_check import check_oauth2_token
from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
from litellm.proxy.auth.service_account_checks import service_account_checks
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm.proxy.utils import PrismaClient, ProxyLogging from litellm.proxy.utils import PrismaClient, ProxyLogging
from litellm.types.services import ServiceTypes from litellm.types.services import ServiceTypes
@ -905,12 +904,6 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
else: else:
_team_obj = None _team_obj = None
# Check 7: Check if key is a service account key
await service_account_checks(
valid_token=valid_token,
request_data=request_data,
)
user_api_key_cache.set_cache( user_api_key_cache.set_cache(
key=valid_token.team_id, value=_team_obj key=valid_token.team_id, value=_team_obj
) # save team table in cache - used for tpm/rpm limiting - tpm_rpm_limiter.py ) # save team table in cache - used for tpm/rpm limiting - tpm_rpm_limiter.py

View file

@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing:
""" """
Common request processing logic for both chat completions and responses API endpoints Common request processing logic for both chat completions and responses API endpoints
""" """
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
"Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)), "Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
) )

View file

@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]: def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
if request is None: if request is None:
return None return None
if hasattr(request, "scope") and "parsed_body" in request.scope: if (
return request.scope["parsed_body"] hasattr(request, "scope")
and "parsed_body" in request.scope
and isinstance(request.scope["parsed_body"], tuple)
):
accepted_keys, parsed_body = request.scope["parsed_body"]
return {key: parsed_body[key] for key in accepted_keys}
return None return None
@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
try: try:
if request is None: if request is None:
return return
request.scope["parsed_body"] = parsed_body request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
except Exception as e: except Exception as e:
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
"Unexpected error setting request parsed body - {}".format(e) "Unexpected error setting request parsed body - {}".format(e)

View file

@ -10,7 +10,7 @@ import os
import time import time
import traceback import traceback
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import TYPE_CHECKING, Any, Optional, Union from typing import TYPE_CHECKING, Any, Dict, Optional, Union
import litellm import litellm
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
@ -18,13 +18,19 @@ from litellm.caching import DualCache, RedisCache
from litellm.constants import DB_SPEND_UPDATE_JOB_NAME from litellm.constants import DB_SPEND_UPDATE_JOB_NAME
from litellm.proxy._types import ( from litellm.proxy._types import (
DB_CONNECTION_ERROR_TYPES, DB_CONNECTION_ERROR_TYPES,
DailyUserSpendTransaction,
DBSpendUpdateTransactions, DBSpendUpdateTransactions,
Litellm_EntityType, Litellm_EntityType,
LiteLLM_UserTable, LiteLLM_UserTable,
SpendLogsPayload, SpendLogsPayload,
SpendUpdateQueueItem,
) )
from litellm.proxy.db.pod_lock_manager import PodLockManager from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
from litellm.proxy.db.redis_update_buffer import RedisUpdateBuffer DailySpendUpdateQueue,
)
from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
from litellm.proxy.db.db_transaction_queue.redis_update_buffer import RedisUpdateBuffer
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.proxy.utils import PrismaClient, ProxyLogging from litellm.proxy.utils import PrismaClient, ProxyLogging
@ -48,10 +54,12 @@ class DBSpendUpdateWriter:
self.redis_cache = redis_cache self.redis_cache = redis_cache
self.redis_update_buffer = RedisUpdateBuffer(redis_cache=self.redis_cache) self.redis_update_buffer = RedisUpdateBuffer(redis_cache=self.redis_cache)
self.pod_lock_manager = PodLockManager(cronjob_id=DB_SPEND_UPDATE_JOB_NAME) self.pod_lock_manager = PodLockManager(cronjob_id=DB_SPEND_UPDATE_JOB_NAME)
self.spend_update_queue = SpendUpdateQueue()
self.daily_spend_update_queue = DailySpendUpdateQueue()
@staticmethod
async def update_database( async def update_database(
# LiteLLM management object fields # LiteLLM management object fields
self,
token: Optional[str], token: Optional[str],
user_id: Optional[str], user_id: Optional[str],
end_user_id: Optional[str], end_user_id: Optional[str],
@ -84,7 +92,7 @@ class DBSpendUpdateWriter:
hashed_token = token hashed_token = token
asyncio.create_task( asyncio.create_task(
DBSpendUpdateWriter._update_user_db( self._update_user_db(
response_cost=response_cost, response_cost=response_cost,
user_id=user_id, user_id=user_id,
prisma_client=prisma_client, prisma_client=prisma_client,
@ -94,14 +102,14 @@ class DBSpendUpdateWriter:
) )
) )
asyncio.create_task( asyncio.create_task(
DBSpendUpdateWriter._update_key_db( self._update_key_db(
response_cost=response_cost, response_cost=response_cost,
hashed_token=hashed_token, hashed_token=hashed_token,
prisma_client=prisma_client, prisma_client=prisma_client,
) )
) )
asyncio.create_task( asyncio.create_task(
DBSpendUpdateWriter._update_team_db( self._update_team_db(
response_cost=response_cost, response_cost=response_cost,
team_id=team_id, team_id=team_id,
user_id=user_id, user_id=user_id,
@ -109,14 +117,14 @@ class DBSpendUpdateWriter:
) )
) )
asyncio.create_task( asyncio.create_task(
DBSpendUpdateWriter._update_org_db( self._update_org_db(
response_cost=response_cost, response_cost=response_cost,
org_id=org_id, org_id=org_id,
prisma_client=prisma_client, prisma_client=prisma_client,
) )
) )
if disable_spend_logs is False: if disable_spend_logs is False:
await DBSpendUpdateWriter._insert_spend_log_to_db( await self._insert_spend_log_to_db(
kwargs=kwargs, kwargs=kwargs,
completion_response=completion_response, completion_response=completion_response,
start_time=start_time, start_time=start_time,
@ -135,56 +143,8 @@ class DBSpendUpdateWriter:
f"Error updating Prisma database: {traceback.format_exc()}" f"Error updating Prisma database: {traceback.format_exc()}"
) )
@staticmethod
async def _update_transaction_list(
response_cost: Optional[float],
entity_id: Optional[str],
transaction_list: dict,
entity_type: Litellm_EntityType,
debug_msg: Optional[str] = None,
prisma_client: Optional[PrismaClient] = None,
) -> bool:
"""
Common helper method to update a transaction list for an entity
Args:
response_cost: The cost to add
entity_id: The ID of the entity to update
transaction_list: The transaction list dictionary to update
entity_type: The type of entity (from EntityType enum)
debug_msg: Optional custom debug message
Returns:
bool: True if update happened, False otherwise
"""
try:
if debug_msg:
verbose_proxy_logger.debug(debug_msg)
else:
verbose_proxy_logger.debug(
f"adding spend to {entity_type.value} db. Response cost: {response_cost}. {entity_type.value}_id: {entity_id}."
)
if prisma_client is None:
return False
if entity_id is None:
verbose_proxy_logger.debug(
f"track_cost_callback: {entity_type.value}_id is None. Not tracking spend for {entity_type.value}"
)
return False
transaction_list[entity_id] = response_cost + transaction_list.get(
entity_id, 0
)
return True
except Exception as e:
verbose_proxy_logger.info(
f"Update {entity_type.value.capitalize()} DB failed to execute - {str(e)}\n{traceback.format_exc()}"
)
raise e
@staticmethod
async def _update_key_db( async def _update_key_db(
self,
response_cost: Optional[float], response_cost: Optional[float],
hashed_token: Optional[str], hashed_token: Optional[str],
prisma_client: Optional[PrismaClient], prisma_client: Optional[PrismaClient],
@ -193,13 +153,12 @@ class DBSpendUpdateWriter:
if hashed_token is None or prisma_client is None: if hashed_token is None or prisma_client is None:
return return
await DBSpendUpdateWriter._update_transaction_list( await self.spend_update_queue.add_update(
response_cost=response_cost, update=SpendUpdateQueueItem(
entity_id=hashed_token,
transaction_list=prisma_client.key_list_transactions,
entity_type=Litellm_EntityType.KEY, entity_type=Litellm_EntityType.KEY,
debug_msg=f"adding spend to key db. Response cost: {response_cost}. Token: {hashed_token}.", entity_id=hashed_token,
prisma_client=prisma_client, response_cost=response_cost,
)
) )
except Exception as e: except Exception as e:
verbose_proxy_logger.exception( verbose_proxy_logger.exception(
@ -207,8 +166,8 @@ class DBSpendUpdateWriter:
) )
raise e raise e
@staticmethod
async def _update_user_db( async def _update_user_db(
self,
response_cost: Optional[float], response_cost: Optional[float],
user_id: Optional[str], user_id: Optional[str],
prisma_client: Optional[PrismaClient], prisma_client: Optional[PrismaClient],
@ -234,21 +193,21 @@ class DBSpendUpdateWriter:
for _id in user_ids: for _id in user_ids:
if _id is not None: if _id is not None:
await DBSpendUpdateWriter._update_transaction_list( await self.spend_update_queue.add_update(
response_cost=response_cost, update=SpendUpdateQueueItem(
entity_id=_id,
transaction_list=prisma_client.user_list_transactions,
entity_type=Litellm_EntityType.USER, entity_type=Litellm_EntityType.USER,
prisma_client=prisma_client, entity_id=_id,
response_cost=response_cost,
)
) )
if end_user_id is not None: if end_user_id is not None:
await DBSpendUpdateWriter._update_transaction_list( await self.spend_update_queue.add_update(
response_cost=response_cost, update=SpendUpdateQueueItem(
entity_id=end_user_id,
transaction_list=prisma_client.end_user_list_transactions,
entity_type=Litellm_EntityType.END_USER, entity_type=Litellm_EntityType.END_USER,
prisma_client=prisma_client, entity_id=end_user_id,
response_cost=response_cost,
)
) )
except Exception as e: except Exception as e:
verbose_proxy_logger.info( verbose_proxy_logger.info(
@ -256,8 +215,8 @@ class DBSpendUpdateWriter:
+ f"Update User DB call failed to execute {str(e)}\n{traceback.format_exc()}" + f"Update User DB call failed to execute {str(e)}\n{traceback.format_exc()}"
) )
@staticmethod
async def _update_team_db( async def _update_team_db(
self,
response_cost: Optional[float], response_cost: Optional[float],
team_id: Optional[str], team_id: Optional[str],
user_id: Optional[str], user_id: Optional[str],
@ -270,12 +229,12 @@ class DBSpendUpdateWriter:
) )
return return
await DBSpendUpdateWriter._update_transaction_list( await self.spend_update_queue.add_update(
response_cost=response_cost, update=SpendUpdateQueueItem(
entity_id=team_id,
transaction_list=prisma_client.team_list_transactions,
entity_type=Litellm_EntityType.TEAM, entity_type=Litellm_EntityType.TEAM,
prisma_client=prisma_client, entity_id=team_id,
response_cost=response_cost,
)
) )
try: try:
@ -283,12 +242,12 @@ class DBSpendUpdateWriter:
if user_id is not None: if user_id is not None:
# key is "team_id::<value>::user_id::<value>" # key is "team_id::<value>::user_id::<value>"
team_member_key = f"team_id::{team_id}::user_id::{user_id}" team_member_key = f"team_id::{team_id}::user_id::{user_id}"
await DBSpendUpdateWriter._update_transaction_list( await self.spend_update_queue.add_update(
response_cost=response_cost, update=SpendUpdateQueueItem(
entity_id=team_member_key,
transaction_list=prisma_client.team_member_list_transactions,
entity_type=Litellm_EntityType.TEAM_MEMBER, entity_type=Litellm_EntityType.TEAM_MEMBER,
prisma_client=prisma_client, entity_id=team_member_key,
response_cost=response_cost,
)
) )
except Exception: except Exception:
pass pass
@ -298,8 +257,8 @@ class DBSpendUpdateWriter:
) )
raise e raise e
@staticmethod
async def _update_org_db( async def _update_org_db(
self,
response_cost: Optional[float], response_cost: Optional[float],
org_id: Optional[str], org_id: Optional[str],
prisma_client: Optional[PrismaClient], prisma_client: Optional[PrismaClient],
@ -311,12 +270,12 @@ class DBSpendUpdateWriter:
) )
return return
await DBSpendUpdateWriter._update_transaction_list( await self.spend_update_queue.add_update(
response_cost=response_cost, update=SpendUpdateQueueItem(
entity_id=org_id,
transaction_list=prisma_client.org_list_transactions,
entity_type=Litellm_EntityType.ORGANIZATION, entity_type=Litellm_EntityType.ORGANIZATION,
prisma_client=prisma_client, entity_id=org_id,
response_cost=response_cost,
)
) )
except Exception as e: except Exception as e:
verbose_proxy_logger.info( verbose_proxy_logger.info(
@ -324,8 +283,8 @@ class DBSpendUpdateWriter:
) )
raise e raise e
@staticmethod
async def _insert_spend_log_to_db( async def _insert_spend_log_to_db(
self,
kwargs: Optional[dict], kwargs: Optional[dict],
completion_response: Optional[Union[litellm.ModelResponse, Any, Exception]], completion_response: Optional[Union[litellm.ModelResponse, Any, Exception]],
start_time: Optional[datetime], start_time: Optional[datetime],
@ -346,7 +305,7 @@ class DBSpendUpdateWriter:
end_time=end_time, end_time=end_time,
) )
payload["spend"] = response_cost or 0.0 payload["spend"] = response_cost or 0.0
DBSpendUpdateWriter._set_spend_logs_payload( await self._set_spend_logs_payload(
payload=payload, payload=payload,
spend_logs_url=os.getenv("SPEND_LOGS_URL"), spend_logs_url=os.getenv("SPEND_LOGS_URL"),
prisma_client=prisma_client, prisma_client=prisma_client,
@ -357,8 +316,8 @@ class DBSpendUpdateWriter:
) )
raise e raise e
@staticmethod async def _set_spend_logs_payload(
def _set_spend_logs_payload( self,
payload: Union[dict, SpendLogsPayload], payload: Union[dict, SpendLogsPayload],
prisma_client: PrismaClient, prisma_client: PrismaClient,
spend_logs_url: Optional[str] = None, spend_logs_url: Optional[str] = None,
@ -377,8 +336,9 @@ class DBSpendUpdateWriter:
elif prisma_client is not None: elif prisma_client is not None:
prisma_client.spend_log_transactions.append(payload) prisma_client.spend_log_transactions.append(payload)
prisma_client.add_spend_log_transaction_to_daily_user_transaction( await self.add_spend_log_transaction_to_daily_user_transaction(
payload.copy() payload=payload.copy(),
prisma_client=prisma_client,
) )
return prisma_client return prisma_client
@ -435,7 +395,8 @@ class DBSpendUpdateWriter:
- Only 1 pod will commit to db at a time (based on if it can acquire the lock over writing to DB) - Only 1 pod will commit to db at a time (based on if it can acquire the lock over writing to DB)
""" """
await self.redis_update_buffer.store_in_memory_spend_updates_in_redis( await self.redis_update_buffer.store_in_memory_spend_updates_in_redis(
prisma_client=prisma_client, spend_update_queue=self.spend_update_queue,
daily_spend_update_queue=self.daily_spend_update_queue,
) )
# Only commit from redis to db if this pod is the leader # Only commit from redis to db if this pod is the leader
@ -447,12 +408,23 @@ class DBSpendUpdateWriter:
await self.redis_update_buffer.get_all_update_transactions_from_redis_buffer() await self.redis_update_buffer.get_all_update_transactions_from_redis_buffer()
) )
if db_spend_update_transactions is not None: if db_spend_update_transactions is not None:
await DBSpendUpdateWriter._commit_spend_updates_to_db( await self._commit_spend_updates_to_db(
prisma_client=prisma_client, prisma_client=prisma_client,
n_retry_times=n_retry_times, n_retry_times=n_retry_times,
proxy_logging_obj=proxy_logging_obj, proxy_logging_obj=proxy_logging_obj,
db_spend_update_transactions=db_spend_update_transactions, db_spend_update_transactions=db_spend_update_transactions,
) )
daily_spend_update_transactions = (
await self.redis_update_buffer.get_all_daily_spend_update_transactions_from_redis_buffer()
)
if daily_spend_update_transactions is not None:
await DBSpendUpdateWriter.update_daily_user_spend(
n_retry_times=n_retry_times,
prisma_client=prisma_client,
proxy_logging_obj=proxy_logging_obj,
daily_spend_transactions=daily_spend_update_transactions,
)
except Exception as e: except Exception as e:
verbose_proxy_logger.error(f"Error committing spend updates: {e}") verbose_proxy_logger.error(f"Error committing spend updates: {e}")
finally: finally:
@ -471,23 +443,34 @@ class DBSpendUpdateWriter:
Note: This flow causes Deadlocks in production (1K RPS+). Use self._commit_spend_updates_to_db_with_redis() instead if you expect 1K+ RPS. Note: This flow causes Deadlocks in production (1K RPS+). Use self._commit_spend_updates_to_db_with_redis() instead if you expect 1K+ RPS.
""" """
db_spend_update_transactions = DBSpendUpdateTransactions(
user_list_transactions=prisma_client.user_list_transactions, # Aggregate all in memory spend updates (key, user, end_user, team, team_member, org) and commit to db
end_user_list_transactions=prisma_client.end_user_list_transactions, ################## Spend Update Transactions ##################
key_list_transactions=prisma_client.key_list_transactions, db_spend_update_transactions = (
team_list_transactions=prisma_client.team_list_transactions, await self.spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
team_member_list_transactions=prisma_client.team_member_list_transactions,
org_list_transactions=prisma_client.org_list_transactions,
) )
await DBSpendUpdateWriter._commit_spend_updates_to_db( await self._commit_spend_updates_to_db(
prisma_client=prisma_client, prisma_client=prisma_client,
n_retry_times=n_retry_times, n_retry_times=n_retry_times,
proxy_logging_obj=proxy_logging_obj, proxy_logging_obj=proxy_logging_obj,
db_spend_update_transactions=db_spend_update_transactions, db_spend_update_transactions=db_spend_update_transactions,
) )
@staticmethod ################## Daily Spend Update Transactions ##################
# Aggregate all in memory daily spend transactions and commit to db
daily_spend_update_transactions = (
await self.daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
)
await DBSpendUpdateWriter.update_daily_user_spend(
n_retry_times=n_retry_times,
prisma_client=prisma_client,
proxy_logging_obj=proxy_logging_obj,
daily_spend_transactions=daily_spend_update_transactions,
)
async def _commit_spend_updates_to_db( # noqa: PLR0915 async def _commit_spend_updates_to_db( # noqa: PLR0915
self,
prisma_client: PrismaClient, prisma_client: PrismaClient,
n_retry_times: int, n_retry_times: int,
proxy_logging_obj: ProxyLogging, proxy_logging_obj: ProxyLogging,
@ -526,9 +509,6 @@ class DBSpendUpdateWriter:
where={"user_id": user_id}, where={"user_id": user_id},
data={"spend": {"increment": response_cost}}, data={"spend": {"increment": response_cost}},
) )
prisma_client.user_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break break
except DB_CONNECTION_ERROR_TYPES as e: except DB_CONNECTION_ERROR_TYPES as e:
if ( if (
@ -561,6 +541,7 @@ class DBSpendUpdateWriter:
n_retry_times=n_retry_times, n_retry_times=n_retry_times,
prisma_client=prisma_client, prisma_client=prisma_client,
proxy_logging_obj=proxy_logging_obj, proxy_logging_obj=proxy_logging_obj,
end_user_list_transactions=end_user_list_transactions,
) )
### UPDATE KEY TABLE ### ### UPDATE KEY TABLE ###
key_list_transactions = db_spend_update_transactions["key_list_transactions"] key_list_transactions = db_spend_update_transactions["key_list_transactions"]
@ -583,9 +564,6 @@ class DBSpendUpdateWriter:
where={"token": token}, where={"token": token},
data={"spend": {"increment": response_cost}}, data={"spend": {"increment": response_cost}},
) )
prisma_client.key_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break break
except DB_CONNECTION_ERROR_TYPES as e: except DB_CONNECTION_ERROR_TYPES as e:
if ( if (
@ -632,9 +610,6 @@ class DBSpendUpdateWriter:
where={"team_id": team_id}, where={"team_id": team_id},
data={"spend": {"increment": response_cost}}, data={"spend": {"increment": response_cost}},
) )
prisma_client.team_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break break
except DB_CONNECTION_ERROR_TYPES as e: except DB_CONNECTION_ERROR_TYPES as e:
if ( if (
@ -684,9 +659,6 @@ class DBSpendUpdateWriter:
where={"team_id": team_id, "user_id": user_id}, where={"team_id": team_id, "user_id": user_id},
data={"spend": {"increment": response_cost}}, data={"spend": {"increment": response_cost}},
) )
prisma_client.team_member_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break break
except DB_CONNECTION_ERROR_TYPES as e: except DB_CONNECTION_ERROR_TYPES as e:
if ( if (
@ -725,9 +697,6 @@ class DBSpendUpdateWriter:
where={"organization_id": org_id}, where={"organization_id": org_id},
data={"spend": {"increment": response_cost}}, data={"spend": {"increment": response_cost}},
) )
prisma_client.org_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break break
except DB_CONNECTION_ERROR_TYPES as e: except DB_CONNECTION_ERROR_TYPES as e:
if ( if (
@ -744,3 +713,192 @@ class DBSpendUpdateWriter:
_raise_failed_update_spend_exception( _raise_failed_update_spend_exception(
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
) )
@staticmethod
async def update_daily_user_spend(
n_retry_times: int,
prisma_client: PrismaClient,
proxy_logging_obj: ProxyLogging,
daily_spend_transactions: Dict[str, DailyUserSpendTransaction],
):
"""
Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
"""
from litellm.proxy.utils import _raise_failed_update_spend_exception
### UPDATE DAILY USER SPEND ###
verbose_proxy_logger.debug(
"Daily User Spend transactions: {}".format(len(daily_spend_transactions))
)
BATCH_SIZE = (
100 # Number of aggregated records to update in each database operation
)
start_time = time.time()
try:
for i in range(n_retry_times + 1):
try:
# Get transactions to process
transactions_to_process = dict(
list(daily_spend_transactions.items())[:BATCH_SIZE]
)
if len(transactions_to_process) == 0:
verbose_proxy_logger.debug(
"No new transactions to process for daily spend update"
)
break
# Update DailyUserSpend table in batches
async with prisma_client.db.batch_() as batcher:
for _, transaction in transactions_to_process.items():
user_id = transaction.get("user_id")
if not user_id: # Skip if no user_id
continue
batcher.litellm_dailyuserspend.upsert(
where={
"user_id_date_api_key_model_custom_llm_provider": {
"user_id": user_id,
"date": transaction["date"],
"api_key": transaction["api_key"],
"model": transaction["model"],
"custom_llm_provider": transaction.get(
"custom_llm_provider"
),
}
},
data={
"create": {
"user_id": user_id,
"date": transaction["date"],
"api_key": transaction["api_key"],
"model": transaction["model"],
"model_group": transaction.get("model_group"),
"custom_llm_provider": transaction.get(
"custom_llm_provider"
),
"prompt_tokens": transaction["prompt_tokens"],
"completion_tokens": transaction[
"completion_tokens"
],
"spend": transaction["spend"],
"api_requests": transaction["api_requests"],
"successful_requests": transaction[
"successful_requests"
],
"failed_requests": transaction[
"failed_requests"
],
},
"update": {
"prompt_tokens": {
"increment": transaction["prompt_tokens"]
},
"completion_tokens": {
"increment": transaction[
"completion_tokens"
]
},
"spend": {"increment": transaction["spend"]},
"api_requests": {
"increment": transaction["api_requests"]
},
"successful_requests": {
"increment": transaction[
"successful_requests"
]
},
"failed_requests": {
"increment": transaction["failed_requests"]
},
},
},
)
verbose_proxy_logger.info(
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
)
# Remove processed transactions
for key in transactions_to_process.keys():
daily_spend_transactions.pop(key, None)
verbose_proxy_logger.debug(
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
)
break
except DB_CONNECTION_ERROR_TYPES as e:
if i >= n_retry_times:
_raise_failed_update_spend_exception(
e=e,
start_time=start_time,
proxy_logging_obj=proxy_logging_obj,
)
await asyncio.sleep(2**i) # Exponential backoff
except Exception as e:
# Remove processed transactions even if there was an error
if "transactions_to_process" in locals():
for key in transactions_to_process.keys(): # type: ignore
daily_spend_transactions.pop(key, None)
_raise_failed_update_spend_exception(
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
)
async def add_spend_log_transaction_to_daily_user_transaction(
self,
payload: Union[dict, SpendLogsPayload],
prisma_client: PrismaClient,
):
"""
Add a spend log transaction to the `daily_spend_update_queue`
Key = @@unique([user_id, date, api_key, model, custom_llm_provider]) )
If key exists, update the transaction with the new spend and usage
"""
expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"]
if not all(key in payload for key in expected_keys):
verbose_proxy_logger.debug(
f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions"
)
return
request_status = prisma_client.get_request_status(payload)
verbose_proxy_logger.info(f"Logged request status: {request_status}")
if isinstance(payload["startTime"], datetime):
start_time = payload["startTime"].isoformat()
date = start_time.split("T")[0]
elif isinstance(payload["startTime"], str):
date = payload["startTime"].split("T")[0]
else:
verbose_proxy_logger.debug(
f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions"
)
return
try:
daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}"
daily_transaction = DailyUserSpendTransaction(
user_id=payload["user"],
date=date,
api_key=payload["api_key"],
model=payload["model"],
model_group=payload["model_group"],
custom_llm_provider=payload["custom_llm_provider"],
prompt_tokens=payload["prompt_tokens"],
completion_tokens=payload["completion_tokens"],
spend=payload["spend"],
api_requests=1,
successful_requests=1 if request_status == "success" else 0,
failed_requests=1 if request_status != "success" else 0,
)
await self.daily_spend_update_queue.add_update(
update={daily_transaction_key: daily_transaction}
)
except Exception as e:
raise e

View file

@ -0,0 +1,25 @@
"""
Base class for in memory buffer for database transactions
"""
import asyncio
from litellm._logging import verbose_proxy_logger
class BaseUpdateQueue:
"""Base class for in memory buffer for database transactions"""
def __init__(self):
self.update_queue = asyncio.Queue()
async def add_update(self, update):
"""Enqueue an update."""
verbose_proxy_logger.debug("Adding update to queue: %s", update)
await self.update_queue.put(update)
async def flush_all_updates_from_in_memory_queue(self):
"""Get all updates from the queue."""
updates = []
while not self.update_queue.empty():
updates.append(await self.update_queue.get())
return updates

View file

@ -0,0 +1,95 @@
import asyncio
from typing import Dict, List
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import DailyUserSpendTransaction
from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
class DailySpendUpdateQueue(BaseUpdateQueue):
"""
In memory buffer for daily spend updates that should be committed to the database
To add a new daily spend update transaction, use the following format:
daily_spend_update_queue.add_update({
"user1_date_api_key_model_custom_llm_provider": {
"spend": 10,
"prompt_tokens": 100,
"completion_tokens": 100,
}
})
Queue contains a list of daily spend update transactions
eg
queue = [
{
"user1_date_api_key_model_custom_llm_provider": {
"spend": 10,
"prompt_tokens": 100,
"completion_tokens": 100,
"api_requests": 100,
"successful_requests": 100,
"failed_requests": 100,
}
},
{
"user2_date_api_key_model_custom_llm_provider": {
"spend": 10,
"prompt_tokens": 100,
"completion_tokens": 100,
"api_requests": 100,
"successful_requests": 100,
"failed_requests": 100,
}
}
]
"""
def __init__(self):
super().__init__()
self.update_queue: asyncio.Queue[
Dict[str, DailyUserSpendTransaction]
] = asyncio.Queue()
async def flush_and_get_aggregated_daily_spend_update_transactions(
self,
) -> Dict[str, DailyUserSpendTransaction]:
"""Get all updates from the queue and return all updates aggregated by daily_transaction_key."""
updates = await self.flush_all_updates_from_in_memory_queue()
aggregated_daily_spend_update_transactions = (
DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
updates
)
)
verbose_proxy_logger.debug(
"Aggregated daily spend update transactions: %s",
aggregated_daily_spend_update_transactions,
)
return aggregated_daily_spend_update_transactions
@staticmethod
def get_aggregated_daily_spend_update_transactions(
updates: List[Dict[str, DailyUserSpendTransaction]]
) -> Dict[str, DailyUserSpendTransaction]:
"""Aggregate updates by daily_transaction_key."""
aggregated_daily_spend_update_transactions: Dict[
str, DailyUserSpendTransaction
] = {}
for _update in updates:
for _key, payload in _update.items():
if _key in aggregated_daily_spend_update_transactions:
daily_transaction = aggregated_daily_spend_update_transactions[_key]
daily_transaction["spend"] += payload["spend"]
daily_transaction["prompt_tokens"] += payload["prompt_tokens"]
daily_transaction["completion_tokens"] += payload[
"completion_tokens"
]
daily_transaction["api_requests"] += payload["api_requests"]
daily_transaction["successful_requests"] += payload[
"successful_requests"
]
daily_transaction["failed_requests"] += payload["failed_requests"]
else:
aggregated_daily_spend_update_transactions[_key] = payload
return aggregated_daily_spend_update_transactions

View file

@ -9,9 +9,17 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.caching import RedisCache from litellm.caching import RedisCache
from litellm.constants import MAX_REDIS_BUFFER_DEQUEUE_COUNT, REDIS_UPDATE_BUFFER_KEY from litellm.constants import (
MAX_REDIS_BUFFER_DEQUEUE_COUNT,
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
REDIS_UPDATE_BUFFER_KEY,
)
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
from litellm.proxy._types import DBSpendUpdateTransactions from litellm.proxy._types import DailyUserSpendTransaction, DBSpendUpdateTransactions
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
DailySpendUpdateQueue,
)
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
from litellm.secret_managers.main import str_to_bool from litellm.secret_managers.main import str_to_bool
if TYPE_CHECKING: if TYPE_CHECKING:
@ -54,11 +62,17 @@ class RedisUpdateBuffer:
async def store_in_memory_spend_updates_in_redis( async def store_in_memory_spend_updates_in_redis(
self, self,
prisma_client: PrismaClient, spend_update_queue: SpendUpdateQueue,
daily_spend_update_queue: DailySpendUpdateQueue,
): ):
""" """
Stores the in-memory spend updates to Redis Stores the in-memory spend updates to Redis
Stores the following in memory data structures in Redis:
- SpendUpdateQueue - Key, User, Team, TeamMember, Org, EndUser Spend updates
- DailySpendUpdateQueue - Daily Spend updates Aggregate view
For SpendUpdateQueue:
Each transaction is a dict stored as following: Each transaction is a dict stored as following:
- key is the entity id - key is the entity id
- value is the spend amount - value is the spend amount
@ -72,19 +86,46 @@ class RedisUpdateBuffer:
"0929880203": 0.001, "0929880203": 0.001,
] ]
``` ```
For DailySpendUpdateQueue:
Each transaction is a Dict[str, DailyUserSpendTransaction] stored as following:
- key is the daily_transaction_key
- value is the DailyUserSpendTransaction
```
Redis List:
daily_spend_update_transactions:
[
{
"user_keyhash_1_model_1": {
"spend": 1.2,
"prompt_tokens": 1000,
"completion_tokens": 1000,
"api_requests": 1000,
"successful_requests": 1000,
},
}
]
```
""" """
if self.redis_cache is None: if self.redis_cache is None:
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
"redis_cache is None, skipping store_in_memory_spend_updates_in_redis" "redis_cache is None, skipping store_in_memory_spend_updates_in_redis"
) )
return return
db_spend_update_transactions: DBSpendUpdateTransactions = DBSpendUpdateTransactions(
user_list_transactions=prisma_client.user_list_transactions, db_spend_update_transactions = (
end_user_list_transactions=prisma_client.end_user_list_transactions, await spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
key_list_transactions=prisma_client.key_list_transactions, )
team_list_transactions=prisma_client.team_list_transactions, verbose_proxy_logger.debug(
team_member_list_transactions=prisma_client.team_member_list_transactions, "ALL DB SPEND UPDATE TRANSACTIONS: %s", db_spend_update_transactions
org_list_transactions=prisma_client.org_list_transactions, )
daily_spend_update_transactions = (
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
)
verbose_proxy_logger.debug(
"ALL DAILY SPEND UPDATE TRANSACTIONS: %s", daily_spend_update_transactions
) )
# only store in redis if there are any updates to commit # only store in redis if there are any updates to commit
@ -100,8 +141,13 @@ class RedisUpdateBuffer:
values=list_of_transactions, values=list_of_transactions,
) )
# clear the in-memory spend updates list_of_daily_spend_update_transactions = [
RedisUpdateBuffer._clear_all_in_memory_spend_updates(prisma_client) safe_dumps(daily_spend_update_transactions)
]
await self.redis_cache.async_rpush(
key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
values=list_of_daily_spend_update_transactions,
)
@staticmethod @staticmethod
def _number_of_transactions_to_store_in_redis( def _number_of_transactions_to_store_in_redis(
@ -116,20 +162,6 @@ class RedisUpdateBuffer:
num_transactions += len(v) num_transactions += len(v)
return num_transactions return num_transactions
@staticmethod
def _clear_all_in_memory_spend_updates(
prisma_client: PrismaClient,
):
"""
Clears all in-memory spend updates
"""
prisma_client.user_list_transactions = {}
prisma_client.end_user_list_transactions = {}
prisma_client.key_list_transactions = {}
prisma_client.team_list_transactions = {}
prisma_client.team_member_list_transactions = {}
prisma_client.org_list_transactions = {}
@staticmethod @staticmethod
def _remove_prefix_from_keys(data: Dict[str, Any], prefix: str) -> Dict[str, Any]: def _remove_prefix_from_keys(data: Dict[str, Any], prefix: str) -> Dict[str, Any]:
""" """
@ -197,6 +229,27 @@ class RedisUpdateBuffer:
return combined_transaction return combined_transaction
async def get_all_daily_spend_update_transactions_from_redis_buffer(
self,
) -> Optional[Dict[str, DailyUserSpendTransaction]]:
"""
Gets all the daily spend update transactions from Redis
"""
if self.redis_cache is None:
return None
list_of_transactions = await self.redis_cache.async_lpop(
key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
count=MAX_REDIS_BUFFER_DEQUEUE_COUNT,
)
if list_of_transactions is None:
return None
list_of_daily_spend_update_transactions = [
json.loads(transaction) for transaction in list_of_transactions
]
return DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
list_of_daily_spend_update_transactions
)
@staticmethod @staticmethod
def _parse_list_of_transactions( def _parse_list_of_transactions(
list_of_transactions: Union[Any, List[Any]], list_of_transactions: Union[Any, List[Any]],

View file

@ -0,0 +1,113 @@
import asyncio
from typing import List
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import (
DBSpendUpdateTransactions,
Litellm_EntityType,
SpendUpdateQueueItem,
)
from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
class SpendUpdateQueue(BaseUpdateQueue):
"""
In memory buffer for spend updates that should be committed to the database
"""
def __init__(self):
super().__init__()
self.update_queue: asyncio.Queue[SpendUpdateQueueItem] = asyncio.Queue()
async def flush_and_get_aggregated_db_spend_update_transactions(
self,
) -> DBSpendUpdateTransactions:
"""Flush all updates from the queue and return all updates aggregated by entity type."""
updates = await self.flush_all_updates_from_in_memory_queue()
verbose_proxy_logger.debug("Aggregating updates by entity type: %s", updates)
return self.get_aggregated_db_spend_update_transactions(updates)
def get_aggregated_db_spend_update_transactions(
self, updates: List[SpendUpdateQueueItem]
) -> DBSpendUpdateTransactions:
"""Aggregate updates by entity type."""
# Initialize all transaction lists as empty dicts
db_spend_update_transactions = DBSpendUpdateTransactions(
user_list_transactions={},
end_user_list_transactions={},
key_list_transactions={},
team_list_transactions={},
team_member_list_transactions={},
org_list_transactions={},
)
# Map entity types to their corresponding transaction dictionary keys
entity_type_to_dict_key = {
Litellm_EntityType.USER: "user_list_transactions",
Litellm_EntityType.END_USER: "end_user_list_transactions",
Litellm_EntityType.KEY: "key_list_transactions",
Litellm_EntityType.TEAM: "team_list_transactions",
Litellm_EntityType.TEAM_MEMBER: "team_member_list_transactions",
Litellm_EntityType.ORGANIZATION: "org_list_transactions",
}
for update in updates:
entity_type = update.get("entity_type")
entity_id = update.get("entity_id") or ""
response_cost = update.get("response_cost") or 0
if entity_type is None:
verbose_proxy_logger.debug(
"Skipping update spend for update: %s, because entity_type is None",
update,
)
continue
dict_key = entity_type_to_dict_key.get(entity_type)
if dict_key is None:
verbose_proxy_logger.debug(
"Skipping update spend for update: %s, because entity_type is not in entity_type_to_dict_key",
update,
)
continue # Skip unknown entity types
# Type-safe access using if/elif statements
if dict_key == "user_list_transactions":
transactions_dict = db_spend_update_transactions[
"user_list_transactions"
]
elif dict_key == "end_user_list_transactions":
transactions_dict = db_spend_update_transactions[
"end_user_list_transactions"
]
elif dict_key == "key_list_transactions":
transactions_dict = db_spend_update_transactions[
"key_list_transactions"
]
elif dict_key == "team_list_transactions":
transactions_dict = db_spend_update_transactions[
"team_list_transactions"
]
elif dict_key == "team_member_list_transactions":
transactions_dict = db_spend_update_transactions[
"team_member_list_transactions"
]
elif dict_key == "org_list_transactions":
transactions_dict = db_spend_update_transactions[
"org_list_transactions"
]
else:
continue
if transactions_dict is None:
transactions_dict = {}
# type ignore: dict_key is guaranteed to be one of "one of ("user_list_transactions", "end_user_list_transactions", "key_list_transactions", "team_list_transactions", "team_member_list_transactions", "org_list_transactions")"
db_spend_update_transactions[dict_key] = transactions_dict # type: ignore
if entity_id not in transactions_dict:
transactions_dict[entity_id] = 0
transactions_dict[entity_id] += response_cost or 0
return db_spend_update_transactions

View file

@ -0,0 +1,15 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
use_redis_transaction_buffer: true
litellm_settings:
cache: True
cache_params:
type: redis
supported_call_types: []

View file

@ -14,6 +14,7 @@ from pydantic import BaseModel
from websockets.asyncio.client import ClientConnection, connect from websockets.asyncio.client import ClientConnection, connect
from litellm import DualCache from litellm import DualCache
from litellm._version import version as litellm_version
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
@ -75,7 +76,9 @@ class AimGuardrail(CustomGuardrail):
) -> Union[Exception, str, dict, None]: ) -> Union[Exception, str, dict, None]:
verbose_proxy_logger.debug("Inside AIM Pre-Call Hook") verbose_proxy_logger.debug("Inside AIM Pre-Call Hook")
await self.call_aim_guardrail(data, hook="pre_call") await self.call_aim_guardrail(
data, hook="pre_call", key_alias=user_api_key_dict.key_alias
)
return data return data
async def async_moderation_hook( async def async_moderation_hook(
@ -93,15 +96,18 @@ class AimGuardrail(CustomGuardrail):
) -> Union[Exception, str, dict, None]: ) -> Union[Exception, str, dict, None]:
verbose_proxy_logger.debug("Inside AIM Moderation Hook") verbose_proxy_logger.debug("Inside AIM Moderation Hook")
await self.call_aim_guardrail(data, hook="moderation") await self.call_aim_guardrail(
data, hook="moderation", key_alias=user_api_key_dict.key_alias
)
return data return data
async def call_aim_guardrail(self, data: dict, hook: str) -> None: async def call_aim_guardrail(
self, data: dict, hook: str, key_alias: Optional[str]
) -> None:
user_email = data.get("metadata", {}).get("headers", {}).get("x-aim-user-email") user_email = data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
headers = { headers = self._build_aim_headers(
"Authorization": f"Bearer {self.api_key}", hook=hook, key_alias=key_alias, user_email=user_email
"x-aim-litellm-hook": hook, )
} | ({"x-aim-user-email": user_email} if user_email else {})
response = await self.async_handler.post( response = await self.async_handler.post(
f"{self.api_base}/detect/openai", f"{self.api_base}/detect/openai",
headers=headers, headers=headers,
@ -120,18 +126,16 @@ class AimGuardrail(CustomGuardrail):
raise HTTPException(status_code=400, detail=res["detection_message"]) raise HTTPException(status_code=400, detail=res["detection_message"])
async def call_aim_guardrail_on_output( async def call_aim_guardrail_on_output(
self, request_data: dict, output: str, hook: str self, request_data: dict, output: str, hook: str, key_alias: Optional[str]
) -> Optional[str]: ) -> Optional[str]:
user_email = ( user_email = (
request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email") request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
) )
headers = {
"Authorization": f"Bearer {self.api_key}",
"x-aim-litellm-hook": hook,
} | ({"x-aim-user-email": user_email} if user_email else {})
response = await self.async_handler.post( response = await self.async_handler.post(
f"{self.api_base}/detect/output", f"{self.api_base}/detect/output",
headers=headers, headers=self._build_aim_headers(
hook=hook, key_alias=key_alias, user_email=user_email
),
json={"output": output, "messages": request_data.get("messages", [])}, json={"output": output, "messages": request_data.get("messages", [])},
) )
response.raise_for_status() response.raise_for_status()
@ -147,6 +151,32 @@ class AimGuardrail(CustomGuardrail):
return res["detection_message"] return res["detection_message"]
return None return None
def _build_aim_headers(
self, *, hook: str, key_alias: Optional[str], user_email: Optional[str]
):
"""
A helper function to build the http headers that are required by AIM guardrails.
"""
return (
{
"Authorization": f"Bearer {self.api_key}",
# Used by Aim to apply only the guardrails that should be applied in a specific request phase.
"x-aim-litellm-hook": hook,
# Used by Aim to track LiteLLM version and provide backward compatibility.
"x-aim-litellm-version": litellm_version,
}
# Used by Aim to track guardrails violations by user.
| ({"x-aim-user-email": user_email} if user_email else {})
| (
{
# Used by Aim apply only the guardrails that are associated with the key alias.
"x-aim-litellm-key-alias": key_alias,
}
if key_alias
else {}
)
)
async def async_post_call_success_hook( async def async_post_call_success_hook(
self, self,
data: dict, data: dict,
@ -160,7 +190,7 @@ class AimGuardrail(CustomGuardrail):
): ):
content = response.choices[0].message.content or "" content = response.choices[0].message.content or ""
detection = await self.call_aim_guardrail_on_output( detection = await self.call_aim_guardrail_on_output(
data, content, hook="output" data, content, hook="output", key_alias=user_api_key_dict.key_alias
) )
if detection: if detection:
raise HTTPException(status_code=400, detail=detection) raise HTTPException(status_code=400, detail=detection)
@ -174,11 +204,13 @@ class AimGuardrail(CustomGuardrail):
user_email = ( user_email = (
request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email") request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
) )
headers = {
"Authorization": f"Bearer {self.api_key}",
} | ({"x-aim-user-email": user_email} if user_email else {})
async with connect( async with connect(
f"{self.ws_api_base}/detect/output/ws", additional_headers=headers f"{self.ws_api_base}/detect/output/ws",
additional_headers=self._build_aim_headers(
hook="output",
key_alias=user_api_key_dict.key_alias,
user_email=user_email,
),
) as websocket: ) as websocket:
sender = asyncio.create_task( sender = asyncio.create_task(
self.forward_the_stream_to_aim(websocket, response) self.forward_the_stream_to_aim(websocket, response)

View file

@ -13,7 +13,6 @@ from litellm.litellm_core_utils.core_helpers import (
from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.auth.auth_checks import log_db_metrics from litellm.proxy.auth.auth_checks import log_db_metrics
from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
from litellm.proxy.utils import ProxyUpdateSpend from litellm.proxy.utils import ProxyUpdateSpend
from litellm.types.utils import ( from litellm.types.utils import (
StandardLoggingPayload, StandardLoggingPayload,
@ -37,6 +36,8 @@ class _ProxyDBLogger(CustomLogger):
if _ProxyDBLogger._should_track_errors_in_db() is False: if _ProxyDBLogger._should_track_errors_in_db() is False:
return return
from litellm.proxy.proxy_server import proxy_logging_obj
_metadata = dict( _metadata = dict(
StandardLoggingUserAPIKeyMetadata( StandardLoggingUserAPIKeyMetadata(
user_api_key_hash=user_api_key_dict.api_key, user_api_key_hash=user_api_key_dict.api_key,
@ -66,7 +67,7 @@ class _ProxyDBLogger(CustomLogger):
request_data.get("proxy_server_request") or {} request_data.get("proxy_server_request") or {}
) )
request_data["litellm_params"]["metadata"] = existing_metadata request_data["litellm_params"]["metadata"] = existing_metadata
await DBSpendUpdateWriter.update_database( await proxy_logging_obj.db_spend_update_writer.update_database(
token=user_api_key_dict.api_key, token=user_api_key_dict.api_key,
response_cost=0.0, response_cost=0.0,
user_id=user_api_key_dict.user_id, user_id=user_api_key_dict.user_id,
@ -136,7 +137,7 @@ class _ProxyDBLogger(CustomLogger):
end_user_id=end_user_id, end_user_id=end_user_id,
): ):
## UPDATE DATABASE ## UPDATE DATABASE
await DBSpendUpdateWriter.update_database( await proxy_logging_obj.db_spend_update_writer.update_database(
token=user_api_key, token=user_api_key,
response_cost=response_cost, response_cost=response_cost,
user_id=user_id, user_id=user_id,

View file

@ -747,7 +747,10 @@ def _get_enforced_params(
enforced_params: Optional[list] = None enforced_params: Optional[list] = None
if general_settings is not None: if general_settings is not None:
enforced_params = general_settings.get("enforced_params") enforced_params = general_settings.get("enforced_params")
if "service_account_settings" in general_settings: if (
"service_account_settings" in general_settings
and check_if_token_is_service_account(user_api_key_dict) is True
):
service_account_settings = general_settings["service_account_settings"] service_account_settings = general_settings["service_account_settings"]
if "enforced_params" in service_account_settings: if "enforced_params" in service_account_settings:
if enforced_params is None: if enforced_params is None:
@ -760,6 +763,20 @@ def _get_enforced_params(
return enforced_params return enforced_params
def check_if_token_is_service_account(valid_token: UserAPIKeyAuth) -> bool:
"""
Checks if the token is a service account
Returns:
bool: True if token is a service account
"""
if valid_token.metadata:
if "service_account_id" in valid_token.metadata:
return True
return False
def _enforced_params_check( def _enforced_params_check(
request_body: dict, request_body: dict,
general_settings: Optional[dict], general_settings: Optional[dict],

View file

@ -1259,19 +1259,43 @@ class SpendMetrics(BaseModel):
prompt_tokens: int = Field(default=0) prompt_tokens: int = Field(default=0)
completion_tokens: int = Field(default=0) completion_tokens: int = Field(default=0)
total_tokens: int = Field(default=0) total_tokens: int = Field(default=0)
successful_requests: int = Field(default=0)
failed_requests: int = Field(default=0)
api_requests: int = Field(default=0) api_requests: int = Field(default=0)
class MetricBase(BaseModel):
metrics: SpendMetrics
class MetricWithMetadata(MetricBase):
metadata: Dict[str, Any] = Field(default_factory=dict)
class KeyMetadata(BaseModel):
"""Metadata for a key"""
key_alias: Optional[str] = None
class KeyMetricWithMetadata(MetricBase):
"""Base class for metrics with additional metadata"""
metadata: KeyMetadata = Field(default_factory=KeyMetadata)
class BreakdownMetrics(BaseModel): class BreakdownMetrics(BaseModel):
"""Breakdown of spend by different dimensions""" """Breakdown of spend by different dimensions"""
models: Dict[str, SpendMetrics] = Field(default_factory=dict) # model -> metrics models: Dict[str, MetricWithMetadata] = Field(
providers: Dict[str, SpendMetrics] = Field(
default_factory=dict default_factory=dict
) # provider -> metrics ) # model -> {metrics, metadata}
api_keys: Dict[str, SpendMetrics] = Field( providers: Dict[str, MetricWithMetadata] = Field(
default_factory=dict default_factory=dict
) # api_key -> metrics ) # provider -> {metrics, metadata}
api_keys: Dict[str, KeyMetricWithMetadata] = Field(
default_factory=dict
) # api_key -> {metrics, metadata}
class DailySpendData(BaseModel): class DailySpendData(BaseModel):
@ -1284,7 +1308,10 @@ class DailySpendMetadata(BaseModel):
total_spend: float = Field(default=0.0) total_spend: float = Field(default=0.0)
total_prompt_tokens: int = Field(default=0) total_prompt_tokens: int = Field(default=0)
total_completion_tokens: int = Field(default=0) total_completion_tokens: int = Field(default=0)
total_tokens: int = Field(default=0)
total_api_requests: int = Field(default=0) total_api_requests: int = Field(default=0)
total_successful_requests: int = Field(default=0)
total_failed_requests: int = Field(default=0)
page: int = Field(default=1) page: int = Field(default=1)
total_pages: int = Field(default=1) total_pages: int = Field(default=1)
has_more: bool = Field(default=False) has_more: bool = Field(default=False)
@ -1307,6 +1334,8 @@ class LiteLLM_DailyUserSpend(BaseModel):
completion_tokens: int = 0 completion_tokens: int = 0
spend: float = 0.0 spend: float = 0.0
api_requests: int = 0 api_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
class GroupedData(TypedDict): class GroupedData(TypedDict):
@ -1322,34 +1351,57 @@ def update_metrics(
group_metrics.completion_tokens += record.completion_tokens group_metrics.completion_tokens += record.completion_tokens
group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens
group_metrics.api_requests += record.api_requests group_metrics.api_requests += record.api_requests
group_metrics.successful_requests += record.successful_requests
group_metrics.failed_requests += record.failed_requests
return group_metrics return group_metrics
def update_breakdown_metrics( def update_breakdown_metrics(
breakdown: BreakdownMetrics, record: LiteLLM_DailyUserSpend breakdown: BreakdownMetrics,
record: LiteLLM_DailyUserSpend,
model_metadata: Dict[str, Dict[str, Any]],
provider_metadata: Dict[str, Dict[str, Any]],
api_key_metadata: Dict[str, Dict[str, Any]],
) -> BreakdownMetrics: ) -> BreakdownMetrics:
"""Updates breakdown metrics for a single record using the existing update_metrics function""" """Updates breakdown metrics for a single record using the existing update_metrics function"""
# Update model breakdown # Update model breakdown
if record.model not in breakdown.models: if record.model not in breakdown.models:
breakdown.models[record.model] = SpendMetrics() breakdown.models[record.model] = MetricWithMetadata(
breakdown.models[record.model] = update_metrics( metrics=SpendMetrics(),
breakdown.models[record.model], record metadata=model_metadata.get(
record.model, {}
), # Add any model-specific metadata here
)
breakdown.models[record.model].metrics = update_metrics(
breakdown.models[record.model].metrics, record
) )
# Update provider breakdown # Update provider breakdown
provider = record.custom_llm_provider or "unknown" provider = record.custom_llm_provider or "unknown"
if provider not in breakdown.providers: if provider not in breakdown.providers:
breakdown.providers[provider] = SpendMetrics() breakdown.providers[provider] = MetricWithMetadata(
breakdown.providers[provider] = update_metrics( metrics=SpendMetrics(),
breakdown.providers[provider], record metadata=provider_metadata.get(
provider, {}
), # Add any provider-specific metadata here
)
breakdown.providers[provider].metrics = update_metrics(
breakdown.providers[provider].metrics, record
) )
# Update api key breakdown # Update api key breakdown
if record.api_key not in breakdown.api_keys: if record.api_key not in breakdown.api_keys:
breakdown.api_keys[record.api_key] = SpendMetrics() breakdown.api_keys[record.api_key] = KeyMetricWithMetadata(
breakdown.api_keys[record.api_key] = update_metrics( metrics=SpendMetrics(),
breakdown.api_keys[record.api_key], record metadata=KeyMetadata(
key_alias=api_key_metadata.get(record.api_key, {}).get(
"key_alias", None
)
), # Add any api_key-specific metadata here
)
breakdown.api_keys[record.api_key].metrics = update_metrics(
breakdown.api_keys[record.api_key].metrics, record
) )
return breakdown return breakdown
@ -1428,6 +1480,14 @@ async def get_user_daily_activity(
if api_key: if api_key:
where_conditions["api_key"] = api_key where_conditions["api_key"] = api_key
if (
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
):
where_conditions[
"user_id"
] = user_api_key_dict.user_id # only allow access to own data
# Get total count for pagination # Get total count for pagination
total_count = await prisma_client.db.litellm_dailyuserspend.count( total_count = await prisma_client.db.litellm_dailyuserspend.count(
where=where_conditions where=where_conditions
@ -1443,6 +1503,28 @@ async def get_user_daily_activity(
take=page_size, take=page_size,
) )
daily_spend_data_pydantic_list = [
LiteLLM_DailyUserSpend(**record.model_dump()) for record in daily_spend_data
]
# Get all unique API keys from the spend data
api_keys = set()
for record in daily_spend_data_pydantic_list:
if record.api_key:
api_keys.add(record.api_key)
# Fetch key aliases in bulk
api_key_metadata: Dict[str, Dict[str, Any]] = {}
model_metadata: Dict[str, Dict[str, Any]] = {}
provider_metadata: Dict[str, Dict[str, Any]] = {}
if api_keys:
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
where={"token": {"in": list(api_keys)}}
)
api_key_metadata.update(
{k.token: {"key_alias": k.key_alias} for k in key_records}
)
# Process results # Process results
results = [] results = []
total_metrics = SpendMetrics() total_metrics = SpendMetrics()
@ -1450,7 +1532,7 @@ async def get_user_daily_activity(
# Group data by date and other dimensions # Group data by date and other dimensions
grouped_data: Dict[str, Dict[str, Any]] = {} grouped_data: Dict[str, Dict[str, Any]] = {}
for record in daily_spend_data: for record in daily_spend_data_pydantic_list:
date_str = record.date date_str = record.date
if date_str not in grouped_data: if date_str not in grouped_data:
grouped_data[date_str] = { grouped_data[date_str] = {
@ -1464,7 +1546,11 @@ async def get_user_daily_activity(
) )
# Update breakdowns # Update breakdowns
grouped_data[date_str]["breakdown"] = update_breakdown_metrics( grouped_data[date_str]["breakdown"] = update_breakdown_metrics(
grouped_data[date_str]["breakdown"], record grouped_data[date_str]["breakdown"],
record,
model_metadata,
provider_metadata,
api_key_metadata,
) )
# Update total metrics # Update total metrics
@ -1474,7 +1560,9 @@ async def get_user_daily_activity(
total_metrics.total_tokens += ( total_metrics.total_tokens += (
record.prompt_tokens + record.completion_tokens record.prompt_tokens + record.completion_tokens
) )
total_metrics.api_requests += 1 total_metrics.api_requests += record.api_requests
total_metrics.successful_requests += record.successful_requests
total_metrics.failed_requests += record.failed_requests
# Convert grouped data to response format # Convert grouped data to response format
for date_str, data in grouped_data.items(): for date_str, data in grouped_data.items():
@ -1495,7 +1583,10 @@ async def get_user_daily_activity(
total_spend=total_metrics.spend, total_spend=total_metrics.spend,
total_prompt_tokens=total_metrics.prompt_tokens, total_prompt_tokens=total_metrics.prompt_tokens,
total_completion_tokens=total_metrics.completion_tokens, total_completion_tokens=total_metrics.completion_tokens,
total_tokens=total_metrics.total_tokens,
total_api_requests=total_metrics.api_requests, total_api_requests=total_metrics.api_requests,
total_successful_requests=total_metrics.successful_requests,
total_failed_requests=total_metrics.failed_requests,
page=page, page=page,
total_pages=-(-total_count // page_size), # Ceiling division total_pages=-(-total_count // page_size), # Ceiling division
has_more=(page * page_size) < total_count, has_more=(page * page_size) < total_count,

View file

@ -394,7 +394,7 @@ class ModelManagementAuthChecks:
@staticmethod @staticmethod
async def can_user_make_model_call( async def can_user_make_model_call(
model_params: Union[Deployment, updateDeployment], model_params: Deployment,
user_api_key_dict: UserAPIKeyAuth, user_api_key_dict: UserAPIKeyAuth,
prisma_client: PrismaClient, prisma_client: PrismaClient,
premium_user: bool, premium_user: bool,
@ -723,15 +723,6 @@ async def update_model(
}, },
) )
await ModelManagementAuthChecks.can_user_make_model_call(
model_params=model_params,
user_api_key_dict=user_api_key_dict,
prisma_client=prisma_client,
premium_user=premium_user,
)
# update DB
if store_model_in_db is True:
_model_id = None _model_id = None
_model_info = getattr(model_params, "model_info", None) _model_info = getattr(model_params, "model_info", None)
if _model_info is None: if _model_info is None:
@ -740,11 +731,13 @@ async def update_model(
_model_id = _model_info.id _model_id = _model_info.id
if _model_id is None: if _model_id is None:
raise Exception("model_info.id not provided") raise Exception("model_info.id not provided")
_existing_litellm_params = ( _existing_litellm_params = (
await prisma_client.db.litellm_proxymodeltable.find_unique( await prisma_client.db.litellm_proxymodeltable.find_unique(
where={"model_id": _model_id} where={"model_id": _model_id}
) )
) )
if _existing_litellm_params is None: if _existing_litellm_params is None:
if ( if (
llm_router is not None llm_router is not None
@ -756,7 +749,19 @@ async def update_model(
"error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit." "error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit."
}, },
) )
else:
raise Exception("model not found") raise Exception("model not found")
deployment = Deployment(**_existing_litellm_params.model_dump())
await ModelManagementAuthChecks.can_user_make_model_call(
model_params=deployment,
user_api_key_dict=user_api_key_dict,
prisma_client=prisma_client,
premium_user=premium_user,
)
# update DB
if store_model_in_db is True:
_existing_litellm_params_dict = dict( _existing_litellm_params_dict = dict(
_existing_litellm_params.litellm_params _existing_litellm_params.litellm_params
) )

View file

@ -1,15 +1,6 @@
model_list: model_list:
- model_name: gpt-4o - model_name: fake-openai-endpoint
litellm_params: litellm_params:
model: openai/gpt-4o model: openai/fake
api_key: sk-xxxxxxx api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
mcp_servers:
{
"zapier_mcp": {
"url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
},
"fetch": {
"url": "http://localhost:8000/sse"
}
}

View file

@ -3308,15 +3308,6 @@ async def model_list(
tags=["chat/completions"], tags=["chat/completions"],
responses={200: {"description": "Successful response"}, **ERROR_RESPONSES}, responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
) # azure compatible endpoint ) # azure compatible endpoint
@backoff.on_exception(
backoff.expo,
Exception, # base exception to catch for the backoff
max_tries=global_max_parallel_request_retries, # maximum number of retries
max_time=global_max_parallel_request_retry_timeout, # maximum total time to retry for
on_backoff=on_backoff, # specifying the function to call on backoff
giveup=giveup,
logger=verbose_proxy_logger,
)
async def chat_completion( # noqa: PLR0915 async def chat_completion( # noqa: PLR0915
request: Request, request: Request,
fastapi_response: Response, fastapi_response: Response,

View file

@ -327,6 +327,8 @@ model LiteLLM_DailyUserSpend {
completion_tokens Int @default(0) completion_tokens Int @default(0)
spend Float @default(0.0) spend Float @default(0.0)
api_requests Int @default(0) api_requests Int @default(0)
successful_requests Int @default(0)
failed_requests Int @default(0)
created_at DateTime @default(now()) created_at DateTime @default(now())
updated_at DateTime @updatedAt updated_at DateTime @updatedAt
@ -352,4 +354,3 @@ enum JobStatus {
INACTIVE INACTIVE
} }

View file

@ -10,14 +10,24 @@ import traceback
from datetime import datetime, timedelta from datetime import datetime, timedelta
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, overload from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
Literal,
Optional,
Union,
cast,
overload,
)
from litellm.proxy._types import ( from litellm.proxy._types import (
DB_CONNECTION_ERROR_TYPES, DB_CONNECTION_ERROR_TYPES,
CommonProxyErrors, CommonProxyErrors,
DailyUserSpendTransaction,
ProxyErrorTypes, ProxyErrorTypes,
ProxyException, ProxyException,
SpendLogsMetadata,
SpendLogsPayload, SpendLogsPayload,
) )
from litellm.types.guardrails import GuardrailEventHooks from litellm.types.guardrails import GuardrailEventHooks
@ -1100,14 +1110,7 @@ def jsonify_object(data: dict) -> dict:
class PrismaClient: class PrismaClient:
user_list_transactions: dict = {}
end_user_list_transactions: dict = {}
key_list_transactions: dict = {}
team_list_transactions: dict = {}
team_member_list_transactions: dict = {} # key is ["team_id" + "user_id"]
org_list_transactions: dict = {}
spend_log_transactions: List = [] spend_log_transactions: List = []
daily_user_spend_transactions: Dict[str, DailyUserSpendTransaction] = {}
def __init__( def __init__(
self, self,
@ -1145,62 +1148,40 @@ class PrismaClient:
) # Client to connect to Prisma db ) # Client to connect to Prisma db
verbose_proxy_logger.debug("Success - Created Prisma Client") verbose_proxy_logger.debug("Success - Created Prisma Client")
def add_spend_log_transaction_to_daily_user_transaction( def get_request_status(
self, payload: Union[dict, SpendLogsPayload] self, payload: Union[dict, SpendLogsPayload]
): ) -> Literal["success", "failure"]:
""" """
Add a spend log transaction to the daily user transaction list Determine if a request was successful or failed based on payload metadata.
Key = @@unique([user_id, date, api_key, model, custom_llm_provider]) ) Args:
payload (Union[dict, SpendLogsPayload]): Request payload containing metadata
If key exists, update the transaction with the new spend and usage Returns:
Literal["success", "failure"]: Request status
""" """
expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"]
if not all(key in payload for key in expected_keys):
verbose_proxy_logger.debug(
f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions"
)
return
if isinstance(payload["startTime"], datetime):
start_time = payload["startTime"].isoformat()
date = start_time.split("T")[0]
elif isinstance(payload["startTime"], str):
date = payload["startTime"].split("T")[0]
else:
verbose_proxy_logger.debug(
f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions"
)
return
try: try:
daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}" # Get metadata and convert to dict if it's a JSON string
if daily_transaction_key in self.daily_user_spend_transactions: payload_metadata: Union[Dict, SpendLogsMetadata, str] = payload.get(
daily_transaction = self.daily_user_spend_transactions[ "metadata", {}
daily_transaction_key )
] if isinstance(payload_metadata, str):
daily_transaction["spend"] += payload["spend"] payload_metadata_json: Union[Dict, SpendLogsMetadata] = cast(
daily_transaction["prompt_tokens"] += payload["prompt_tokens"] Dict, json.loads(payload_metadata)
daily_transaction["completion_tokens"] += payload["completion_tokens"] )
daily_transaction["api_requests"] += 1
else: else:
daily_transaction = DailyUserSpendTransaction( payload_metadata_json = payload_metadata
user_id=payload["user"],
date=date, # Check status in metadata dict
api_key=payload["api_key"], return (
model=payload["model"], "failure"
model_group=payload["model_group"], if payload_metadata_json.get("status") == "failure"
custom_llm_provider=payload["custom_llm_provider"], else "success"
prompt_tokens=payload["prompt_tokens"],
completion_tokens=payload["completion_tokens"],
spend=payload["spend"],
api_requests=1,
) )
self.daily_user_spend_transactions[ except (json.JSONDecodeError, AttributeError):
daily_transaction_key # Default to success if metadata parsing fails
] = daily_transaction return "success"
except Exception as e:
raise e
def hash_token(self, token: str): def hash_token(self, token: str):
# Hash the string using SHA-256 # Hash the string using SHA-256
@ -2422,7 +2403,10 @@ def _hash_token_if_needed(token: str) -> str:
class ProxyUpdateSpend: class ProxyUpdateSpend:
@staticmethod @staticmethod
async def update_end_user_spend( async def update_end_user_spend(
n_retry_times: int, prisma_client: PrismaClient, proxy_logging_obj: ProxyLogging n_retry_times: int,
prisma_client: PrismaClient,
proxy_logging_obj: ProxyLogging,
end_user_list_transactions: Dict[str, float],
): ):
for i in range(n_retry_times + 1): for i in range(n_retry_times + 1):
start_time = time.time() start_time = time.time()
@ -2434,7 +2418,7 @@ class ProxyUpdateSpend:
for ( for (
end_user_id, end_user_id,
response_cost, response_cost,
) in prisma_client.end_user_list_transactions.items(): ) in end_user_list_transactions.items():
if litellm.max_end_user_budget is not None: if litellm.max_end_user_budget is not None:
pass pass
batcher.litellm_endusertable.upsert( batcher.litellm_endusertable.upsert(
@ -2461,10 +2445,6 @@ class ProxyUpdateSpend:
_raise_failed_update_spend_exception( _raise_failed_update_spend_exception(
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
) )
finally:
prisma_client.end_user_list_transactions = (
{}
) # reset the end user list transactions - prevent bad data from causing issues
@staticmethod @staticmethod
async def update_spend_logs( async def update_spend_logs(
@ -2538,120 +2518,6 @@ class ProxyUpdateSpend:
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
) )
@staticmethod
async def update_daily_user_spend(
n_retry_times: int,
prisma_client: PrismaClient,
proxy_logging_obj: ProxyLogging,
):
"""
Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
"""
BATCH_SIZE = (
100 # Number of aggregated records to update in each database operation
)
start_time = time.time()
try:
for i in range(n_retry_times + 1):
try:
# Get transactions to process
transactions_to_process = dict(
list(prisma_client.daily_user_spend_transactions.items())[
:BATCH_SIZE
]
)
if len(transactions_to_process) == 0:
verbose_proxy_logger.debug(
"No new transactions to process for daily spend update"
)
break
# Update DailyUserSpend table in batches
async with prisma_client.db.batch_() as batcher:
for _, transaction in transactions_to_process.items():
user_id = transaction.get("user_id")
if not user_id: # Skip if no user_id
continue
batcher.litellm_dailyuserspend.upsert(
where={
"user_id_date_api_key_model_custom_llm_provider": {
"user_id": user_id,
"date": transaction["date"],
"api_key": transaction["api_key"],
"model": transaction["model"],
"custom_llm_provider": transaction.get(
"custom_llm_provider"
),
}
},
data={
"create": {
"user_id": user_id,
"date": transaction["date"],
"api_key": transaction["api_key"],
"model": transaction["model"],
"model_group": transaction.get("model_group"),
"custom_llm_provider": transaction.get(
"custom_llm_provider"
),
"prompt_tokens": transaction["prompt_tokens"],
"completion_tokens": transaction[
"completion_tokens"
],
"spend": transaction["spend"],
"api_requests": transaction["api_requests"],
},
"update": {
"prompt_tokens": {
"increment": transaction["prompt_tokens"]
},
"completion_tokens": {
"increment": transaction[
"completion_tokens"
]
},
"spend": {"increment": transaction["spend"]},
"api_requests": {
"increment": transaction["api_requests"]
},
},
},
)
verbose_proxy_logger.info(
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
)
# Remove processed transactions
for key in transactions_to_process.keys():
prisma_client.daily_user_spend_transactions.pop(key, None)
verbose_proxy_logger.debug(
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
)
break
except DB_CONNECTION_ERROR_TYPES as e:
if i >= n_retry_times:
_raise_failed_update_spend_exception(
e=e,
start_time=start_time,
proxy_logging_obj=proxy_logging_obj,
)
await asyncio.sleep(2**i) # Exponential backoff
except Exception as e:
# Remove processed transactions even if there was an error
if "transactions_to_process" in locals():
for key in transactions_to_process.keys(): # type: ignore
prisma_client.daily_user_spend_transactions.pop(key, None)
_raise_failed_update_spend_exception(
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
)
@staticmethod @staticmethod
def disable_spend_updates() -> bool: def disable_spend_updates() -> bool:
""" """
@ -2701,20 +2567,6 @@ async def update_spend( # noqa: PLR0915
db_writer_client=db_writer_client, db_writer_client=db_writer_client,
) )
### UPDATE DAILY USER SPEND ###
verbose_proxy_logger.debug(
"Daily User Spend transactions: {}".format(
len(prisma_client.daily_user_spend_transactions)
)
)
if len(prisma_client.daily_user_spend_transactions) > 0:
await ProxyUpdateSpend.update_daily_user_spend(
n_retry_times=n_retry_times,
prisma_client=prisma_client,
proxy_logging_obj=proxy_logging_obj,
)
def _raise_failed_update_spend_exception( def _raise_failed_update_spend_exception(
e: Exception, start_time: float, proxy_logging_obj: ProxyLogging e: Exception, start_time: float, proxy_logging_obj: ProxyLogging

View file

@ -0,0 +1,83 @@
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
from typing_extensions import TypeAlias
class AnthropicResponseTextBlock(TypedDict, total=False):
"""
Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
"""
citations: Optional[List[Dict[str, Any]]]
text: str
type: Literal["text"]
class AnthropicResponseToolUseBlock(TypedDict, total=False):
"""
Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
"""
id: Optional[str]
input: Optional[str]
name: Optional[str]
type: Literal["tool_use"]
class AnthropicResponseThinkingBlock(TypedDict, total=False):
"""
Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
"""
signature: Optional[str]
thinking: Optional[str]
type: Literal["thinking"]
class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
"""
Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
"""
data: Optional[str]
type: Literal["redacted_thinking"]
AnthropicResponseContentBlock: TypeAlias = Union[
AnthropicResponseTextBlock,
AnthropicResponseToolUseBlock,
AnthropicResponseThinkingBlock,
AnthropicResponseRedactedThinkingBlock,
]
class AnthropicUsage(TypedDict, total=False):
"""
Input and output tokens used in the request
"""
input_tokens: int
output_tokens: int
"""
Cache Tokens Used
"""
cache_creation_input_tokens: int
cache_read_input_tokens: int
class AnthropicMessagesResponse(TypedDict, total=False):
"""
Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
"""
content: Optional[List[AnthropicResponseContentBlock]]
id: str
model: Optional[str] # This represents the Model type from Anthropic
role: Optional[Literal["assistant"]]
stop_reason: Optional[
Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
]
stop_sequence: Optional[str]
type: Optional[Literal["message"]]
usage: Optional[AnthropicUsage]

View file

@ -1113,3 +1113,6 @@ ResponsesAPIStreamingResponse = Annotated[
], ],
Discriminator("type"), Discriminator("type"),
] ]
REASONING_EFFORT = Literal["low", "medium", "high"]

View file

@ -0,0 +1,9 @@
import json
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
class OpenRouterErrorMessage(TypedDict):
message: str
code: int
metadata: Dict

View file

@ -5901,9 +5901,10 @@ class ModelResponseIterator:
class ModelResponseListIterator: class ModelResponseListIterator:
def __init__(self, model_responses): def __init__(self, model_responses, delay: Optional[float] = None):
self.model_responses = model_responses self.model_responses = model_responses
self.index = 0 self.index = 0
self.delay = delay
# Sync iterator # Sync iterator
def __iter__(self): def __iter__(self):
@ -5914,6 +5915,8 @@ class ModelResponseListIterator:
raise StopIteration raise StopIteration
model_response = self.model_responses[self.index] model_response = self.model_responses[self.index]
self.index += 1 self.index += 1
if self.delay:
time.sleep(self.delay)
return model_response return model_response
# Async iterator # Async iterator
@ -5925,6 +5928,8 @@ class ModelResponseListIterator:
raise StopAsyncIteration raise StopAsyncIteration
model_response = self.model_responses[self.index] model_response = self.model_responses[self.index]
self.index += 1 self.index += 1
if self.delay:
await asyncio.sleep(self.delay)
return model_response return model_response

View file

@ -4453,6 +4453,42 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"gemini-2.5-pro-exp-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_audio_input": true,
"supports_video_input": true,
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-pro-exp-02-05": { "gemini-2.0-pro-exp-02-05": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 2097152, "max_input_tokens": 2097152,
@ -10189,6 +10225,22 @@
"litellm_provider": "voyage", "litellm_provider": "voyage",
"mode": "rerank" "mode": "rerank"
}, },
"databricks/databricks-claude-3-7-sonnet": {
"max_tokens": 200000,
"max_input_tokens": 200000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.0000025,
"input_dbu_cost_per_token": 0.00003571,
"output_cost_per_token": 0.00017857,
"output_db_cost_per_token": 0.000214286,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_tool_choice": true
},
"databricks/databricks-meta-llama-3-1-405b-instruct": { "databricks/databricks-meta-llama-3-1-405b-instruct": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000, "max_input_tokens": 128000,
@ -10217,7 +10269,7 @@
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_tool_choice": true "supports_tool_choice": true
}, },
"databricks/meta-llama-3.3-70b-instruct": { "databricks/databricks-meta-llama-3-3-70b-instruct": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000, "max_input_tokens": 128000,
"max_output_tokens": 128000, "max_output_tokens": 128000,

View file

@ -2,6 +2,7 @@
warn_return_any = False warn_return_any = False
ignore_missing_imports = True ignore_missing_imports = True
mypy_path = litellm/stubs mypy_path = litellm/stubs
namespace_packages = True
[mypy-google.*] [mypy-google.*]
ignore_missing_imports = True ignore_missing_imports = True

87
poetry.lock generated
View file

@ -1151,69 +1151,6 @@ files = [
[package.extras] [package.extras]
protobuf = ["grpcio-tools (>=1.70.0)"] protobuf = ["grpcio-tools (>=1.70.0)"]
[[package]]
name = "grpcio"
version = "1.71.0"
description = "HTTP/2-based RPC framework"
optional = true
python-versions = ">=3.9"
files = [
{file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"},
{file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"},
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea"},
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69"},
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73"},
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804"},
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6"},
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5"},
{file = "grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509"},
{file = "grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a"},
{file = "grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef"},
{file = "grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7"},
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7"},
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7"},
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e"},
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b"},
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7"},
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3"},
{file = "grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444"},
{file = "grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b"},
{file = "grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537"},
{file = "grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7"},
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec"},
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594"},
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c"},
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67"},
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db"},
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79"},
{file = "grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a"},
{file = "grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8"},
{file = "grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379"},
{file = "grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3"},
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db"},
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29"},
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4"},
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3"},
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b"},
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637"},
{file = "grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb"},
{file = "grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366"},
{file = "grpcio-1.71.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d"},
{file = "grpcio-1.71.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e"},
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033"},
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97"},
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d"},
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41"},
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3"},
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32"},
{file = "grpcio-1.71.0-cp39-cp39-win32.whl", hash = "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"},
{file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"},
{file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"},
]
[package.extras]
protobuf = ["grpcio-tools (>=1.71.0)"]
[[package]] [[package]]
name = "grpcio-status" name = "grpcio-status"
version = "1.70.0" version = "1.70.0"
@ -1230,22 +1167,6 @@ googleapis-common-protos = ">=1.5.5"
grpcio = ">=1.70.0" grpcio = ">=1.70.0"
protobuf = ">=5.26.1,<6.0dev" protobuf = ">=5.26.1,<6.0dev"
[[package]]
name = "grpcio-status"
version = "1.71.0"
description = "Status proto mapping for gRPC"
optional = true
python-versions = ">=3.9"
files = [
{file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"},
{file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"},
]
[package.dependencies]
googleapis-common-protos = ">=1.5.5"
grpcio = ">=1.71.0"
protobuf = ">=5.26.1,<6.0dev"
[[package]] [[package]]
name = "gunicorn" name = "gunicorn"
version = "23.0.0" version = "23.0.0"
@ -1678,13 +1599,13 @@ referencing = ">=0.31.0"
[[package]] [[package]]
name = "litellm-proxy-extras" name = "litellm-proxy-extras"
version = "0.1.1" version = "0.1.2"
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package." description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
optional = true optional = true
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
files = [ files = [
{file = "litellm_proxy_extras-0.1.1-py3-none-any.whl", hash = "sha256:2b3c4c5474bacbde2424c1cd13b21f85c65e9c4346f6159badd49a210eedef5c"}, {file = "litellm_proxy_extras-0.1.2-py3-none-any.whl", hash = "sha256:2caa7bdba5a533cd1781b55e3f7c581138d2a5b68a7e6d737327669dd21d5e08"},
{file = "litellm_proxy_extras-0.1.1.tar.gz", hash = "sha256:a1eb911ad2e3742238863d314a8bd6d02dd0cc213ba040b2c0593f132fbf3117"}, {file = "litellm_proxy_extras-0.1.2.tar.gz", hash = "sha256:218e97980ab5a34eed7dcd1564a910c9a790168d672cdec3c464eba9b7cb1518"},
] ]
[[package]] [[package]]
@ -4135,4 +4056,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi",
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7" python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "16cbf20784776377805f5e33c6bc97dce76303132aa3d81c7e6fe743f0ee3fc1" content-hash = "524b2f8276ba057f8dc8a79dd460c1a243ef4aece7c08a8bf344e029e07b8841"

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.65.1" version = "1.65.2"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -55,7 +55,7 @@ websockets = {version = "^13.1.0", optional = true}
boto3 = {version = "1.34.34", optional = true} boto3 = {version = "1.34.34", optional = true}
redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"} redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
mcp = {version = "1.5.0", optional = true, python = ">=3.10"} mcp = {version = "1.5.0", optional = true, python = ">=3.10"}
litellm-proxy-extras = {version = "0.1.1", optional = true} litellm-proxy-extras = {version = "0.1.2", optional = true}
[tool.poetry.extras] [tool.poetry.extras]
proxy = [ proxy = [
@ -117,7 +117,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.65.1" version = "1.65.2"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -38,7 +38,7 @@ sentry_sdk==2.21.0 # for sentry error handling
detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests
cryptography==43.0.1 cryptography==43.0.1
tzdata==2025.1 # IANA time zone database tzdata==2025.1 # IANA time zone database
litellm-proxy-extras==0.1.1 # for proxy extras - e.g. prisma migrations litellm-proxy-extras==0.1.2 # for proxy extras - e.g. prisma migrations
### LITELLM PACKAGE DEPENDENCIES ### LITELLM PACKAGE DEPENDENCIES
python-dotenv==1.0.0 # for env python-dotenv==1.0.0 # for env

View file

@ -327,6 +327,8 @@ model LiteLLM_DailyUserSpend {
completion_tokens Int @default(0) completion_tokens Int @default(0)
spend Float @default(0.0) spend Float @default(0.0)
api_requests Int @default(0) api_requests Int @default(0)
successful_requests Int @default(0)
failed_requests Int @default(0)
created_at DateTime @default(now()) created_at DateTime @default(now())
updated_at DateTime @updatedAt updated_at DateTime @updatedAt
@ -351,3 +353,4 @@ enum JobStatus {
ACTIVE ACTIVE
INACTIVE INACTIVE
} }

View file

@ -1,6 +1,7 @@
import json import json
import os import os
import sys import sys
import time
from unittest.mock import MagicMock, Mock, patch from unittest.mock import MagicMock, Mock, patch
import pytest import pytest
@ -19,6 +20,7 @@ from litellm.types.utils import (
Delta, Delta,
ModelResponseStream, ModelResponseStream,
PromptTokensDetailsWrapper, PromptTokensDetailsWrapper,
StandardLoggingPayload,
StreamingChoices, StreamingChoices,
Usage, Usage,
) )
@ -36,6 +38,22 @@ def initialized_custom_stream_wrapper() -> CustomStreamWrapper:
return streaming_handler return streaming_handler
@pytest.fixture
def logging_obj() -> Logging:
import time
logging_obj = Logging(
model="my-random-model",
messages=[{"role": "user", "content": "Hey"}],
stream=True,
call_type="completion",
start_time=time.time(),
litellm_call_id="12345",
function_id="1245",
)
return logging_obj
bedrock_chunks = [ bedrock_chunks = [
ModelResponseStream( ModelResponseStream(
id="chatcmpl-d249def8-a78b-464c-87b5-3a6f43565292", id="chatcmpl-d249def8-a78b-464c-87b5-3a6f43565292",
@ -577,3 +595,36 @@ def test_streaming_handler_with_stop_chunk(
**args, model_response=ModelResponseStream() **args, model_response=ModelResponseStream()
) )
assert returned_chunk is None assert returned_chunk is None
@pytest.mark.asyncio
async def test_streaming_completion_start_time(logging_obj: Logging):
"""Test that the start time is set correctly"""
from litellm.integrations.custom_logger import CustomLogger
class MockCallback(CustomLogger):
pass
mock_callback = MockCallback()
litellm.success_callback = [mock_callback, "langfuse"]
completion_stream = ModelResponseListIterator(
model_responses=bedrock_chunks, delay=0.1
)
response = CustomStreamWrapper(
completion_stream=completion_stream,
model="bedrock/claude-3-5-sonnet-20240620-v1:0",
logging_obj=logging_obj,
)
async for chunk in response:
print(chunk)
await asyncio.sleep(2)
assert logging_obj.model_call_details["completion_start_time"] is not None
assert (
logging_obj.model_call_details["completion_start_time"]
< logging_obj.model_call_details["end_time"]
)

View file

@ -0,0 +1,81 @@
import json
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from litellm.llms.openrouter.chat.transformation import (
OpenRouterChatCompletionStreamingHandler,
OpenRouterException,
)
class TestOpenRouterChatCompletionStreamingHandler:
def test_chunk_parser_successful(self):
handler = OpenRouterChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
# Test input chunk
chunk = {
"id": "test_id",
"created": 1234567890,
"model": "test_model",
"choices": [
{"delta": {"content": "test content", "reasoning": "test reasoning"}}
],
}
# Parse chunk
result = handler.chunk_parser(chunk)
# Verify response
assert result.id == "test_id"
assert result.object == "chat.completion.chunk"
assert result.created == 1234567890
assert result.model == "test_model"
assert len(result.choices) == 1
assert result.choices[0]["delta"]["reasoning_content"] == "test reasoning"
def test_chunk_parser_error_response(self):
handler = OpenRouterChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
# Test error chunk
error_chunk = {
"error": {
"message": "test error",
"code": 400,
"metadata": {"key": "value"},
"user_id": "test_user",
}
}
# Verify error handling
with pytest.raises(OpenRouterException) as exc_info:
handler.chunk_parser(error_chunk)
assert "Message: test error" in str(exc_info.value)
assert exc_info.value.status_code == 400
def test_chunk_parser_key_error(self):
handler = OpenRouterChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
# Test invalid chunk missing required fields
invalid_chunk = {"incomplete": "data"}
# Verify KeyError handling
with pytest.raises(OpenRouterException) as exc_info:
handler.chunk_parser(invalid_chunk)
assert "KeyError" in str(exc_info.value)
assert exc_info.value.status_code == 400

View file

@ -0,0 +1,97 @@
import json
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
sys.path.insert(0, os.path.abspath("../../../../.."))
from litellm.llms.sagemaker.common_utils import AWSEventStreamDecoder
@pytest.mark.asyncio
async def test_aiter_bytes_unicode_decode_error():
"""
Test that AWSEventStreamDecoder.aiter_bytes() does not raise an error when encountering invalid UTF-8 bytes. (UnicodeDecodeError)
Ensures stream processing continues despite the error.
Relevant issue: https://github.com/BerriAI/litellm/issues/9165
"""
# Create an instance of AWSEventStreamDecoder
decoder = AWSEventStreamDecoder(model="test-model")
# Create a mock event that will trigger a UnicodeDecodeError
mock_event = MagicMock()
mock_event.to_response_dict.return_value = {
"status_code": 200,
"headers": {},
"body": b"\xff\xfe", # Invalid UTF-8 bytes
}
# Create a mock EventStreamBuffer that yields our mock event
mock_buffer = MagicMock()
mock_buffer.__iter__.return_value = [mock_event]
# Mock the EventStreamBuffer class
with patch("botocore.eventstream.EventStreamBuffer", return_value=mock_buffer):
# Create an async generator that yields some test bytes
async def mock_iterator():
yield b""
# Process the stream
chunks = []
async for chunk in decoder.aiter_bytes(mock_iterator()):
if chunk is not None:
print("chunk=", chunk)
chunks.append(chunk)
# Verify that processing continued despite the error
# The chunks list should be empty since we only sent invalid data
assert len(chunks) == 0
@pytest.mark.asyncio
async def test_aiter_bytes_valid_chunk_followed_by_unicode_error():
"""
Test that valid chunks are processed correctly even when followed by Unicode decode errors.
This ensures errors don't corrupt or prevent processing of valid data that came before.
Relevant issue: https://github.com/BerriAI/litellm/issues/9165
"""
decoder = AWSEventStreamDecoder(model="test-model")
# Create two mock events - first valid, then invalid
mock_valid_event = MagicMock()
mock_valid_event.to_response_dict.return_value = {
"status_code": 200,
"headers": {},
"body": json.dumps({"token": {"text": "hello"}}).encode(), # Valid data first
}
mock_invalid_event = MagicMock()
mock_invalid_event.to_response_dict.return_value = {
"status_code": 200,
"headers": {},
"body": b"\xff\xfe", # Invalid UTF-8 bytes second
}
# Create a mock EventStreamBuffer that yields valid event first, then invalid
mock_buffer = MagicMock()
mock_buffer.__iter__.return_value = [mock_valid_event, mock_invalid_event]
with patch("botocore.eventstream.EventStreamBuffer", return_value=mock_buffer):
async def mock_iterator():
yield b"test_bytes"
chunks = []
async for chunk in decoder.aiter_bytes(mock_iterator()):
if chunk is not None:
chunks.append(chunk)
# Verify we got our valid chunk despite the subsequent error
assert len(chunks) == 1
assert chunks[0]["text"] == "hello" # Verify the content of the valid chunk

View file

@ -1,137 +0,0 @@
import os
import sys
from unittest.mock import MagicMock, patch
import pytest
sys.path.insert(
0, os.path.abspath("../../../..")
) # Adds the parent directory to the system path
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
def test_anthropic_prompt_caching_headers_for_vertex():
"""
Test that the prompt caching beta header is correctly set for Vertex AI requests
with Anthropic models when cache control is present in the messages.
"""
# Create an instance of AnthropicConfig
config = AnthropicConfig()
# Test case 1: Vertex request with prompt caching
# Create a message with cache control
messages = [
{
"role": "system",
"content": "You are a helpful assistant.",
"cache_control": {"type": "ephemeral"}
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]
# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages)
assert is_cache_control_set is True, "Cache control should be detected in messages"
# Generate headers for a Vertex AI request with prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)
# Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"
# Test case 2: Vertex request without prompt caching
messages_without_cache = [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]
# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages_without_cache)
assert is_cache_control_set is False, "Cache control should not be detected in messages"
# Generate headers for a Vertex AI request without prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)
# Verify that the anthropic-beta header is not set
assert "anthropic-beta" not in headers, "anthropic-beta header should not be present"
def test_anthropic_prompt_caching_with_content_blocks():
"""
Test that prompt caching is correctly detected when cache control is in content blocks.
"""
config = AnthropicConfig()
# Message with cache control in content blocks
messages = [
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are a helpful assistant.",
"cache_control": {"type": "ephemeral"}
}
]
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]
# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages)
assert is_cache_control_set is True, "Cache control should be detected in content blocks"
# Generate headers for a Vertex AI request with prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)
# Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"
def test_anthropic_vertex_other_beta_headers():
"""
Test that other beta headers are not included for Vertex AI requests.
"""
config = AnthropicConfig()
# Generate headers with multiple beta features
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=True,
computer_tool_used=True, # This should be excluded for Vertex
pdf_used=True, # This should be excluded for Vertex
is_vertex_request=True
)
# Verify that only prompt-caching is included in the beta header
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert headers["anthropic-beta"] == "prompt-caching-2024-07-31", "Only prompt-caching should be in the beta header"
assert "computer-use-2024-10-22" not in headers["anthropic-beta"], "computer-use beta should not be included"
assert "pdfs-2024-09-25" not in headers["anthropic-beta"], "pdfs beta should not be included"

View file

@ -39,7 +39,7 @@ async def test_request_body_caching():
result1 = await _read_request_body(mock_request) result1 = await _read_request_body(mock_request)
assert result1 == test_data assert result1 == test_data
assert "parsed_body" in mock_request.scope assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == test_data assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})
# Verify the body was read once # Verify the body was read once
mock_request.body.assert_called_once() mock_request.body.assert_called_once()
@ -49,7 +49,7 @@ async def test_request_body_caching():
# Second call should use the cached body # Second call should use the cached body
result2 = await _read_request_body(mock_request) result2 = await _read_request_body(mock_request)
assert result2 == test_data assert result2 == {"key": "value"}
# Verify the body was not read again # Verify the body was not read again
mock_request.body.assert_not_called() mock_request.body.assert_not_called()
@ -75,7 +75,10 @@ async def test_form_data_parsing():
# Verify the form data was correctly parsed # Verify the form data was correctly parsed
assert result == test_data assert result == test_data
assert "parsed_body" in mock_request.scope assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == test_data assert mock_request.scope["parsed_body"] == (
("name", "message"),
{"name": "test_user", "message": "hello world"},
)
# Verify form() was called # Verify form() was called
mock_request.form.assert_called_once() mock_request.form.assert_called_once()
@ -101,7 +104,46 @@ async def test_empty_request_body():
# Verify an empty dict is returned # Verify an empty dict is returned
assert result == {} assert result == {}
assert "parsed_body" in mock_request.scope assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == {} assert mock_request.scope["parsed_body"] == ((), {})
# Verify the body was read # Verify the body was read
mock_request.body.assert_called_once() mock_request.body.assert_called_once()
@pytest.mark.asyncio
async def test_circular_reference_handling():
"""
Test that cached request body isn't modified when the returned result is modified.
Demonstrates the mutable dictionary reference issue.
"""
# Create a mock request with initial data
mock_request = MagicMock()
initial_body = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "Hello"}],
}
mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
mock_request.headers = {"content-type": "application/json"}
mock_request.scope = {}
# First parse
result = await _read_request_body(mock_request)
# Verify initial parse
assert result["model"] == "gpt-4"
assert result["messages"] == [{"role": "user", "content": "Hello"}]
# Modify the result by adding proxy_server_request
result["proxy_server_request"] = {
"url": "http://0.0.0.0:4000/v1/chat/completions",
"method": "POST",
"headers": {"content-type": "application/json"},
"body": result, # Creates circular reference
}
# Second parse using the same request - will use the modified cached value
result2 = await _read_request_body(mock_request)
assert (
"proxy_server_request" not in result2
) # This will pass, showing the cache pollution

View file

@ -0,0 +1,264 @@
import asyncio
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
from litellm.proxy._types import (
DailyUserSpendTransaction,
Litellm_EntityType,
SpendUpdateQueueItem,
)
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
DailySpendUpdateQueue,
)
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
@pytest.fixture
def daily_spend_update_queue():
return DailySpendUpdateQueue()
@pytest.mark.asyncio
async def test_empty_queue_flush(daily_spend_update_queue):
"""Test flushing an empty queue returns an empty list"""
result = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
assert result == []
@pytest.mark.asyncio
async def test_add_single_update(daily_spend_update_queue):
"""Test adding a single update to the queue"""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
# Add update to queue
await daily_spend_update_queue.add_update({test_key: test_transaction})
# Flush and check
updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
assert len(updates) == 1
assert test_key in updates[0]
assert updates[0][test_key] == test_transaction
@pytest.mark.asyncio
async def test_add_multiple_updates(daily_spend_update_queue):
"""Test adding multiple updates to the queue"""
test_key1 = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction1 = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
test_key2 = "user2_2023-01-01_key456_gpt-3.5-turbo_openai"
test_transaction2 = {
"spend": 5.0,
"prompt_tokens": 200,
"completion_tokens": 30,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
# Add updates to queue
await daily_spend_update_queue.add_update({test_key1: test_transaction1})
await daily_spend_update_queue.add_update({test_key2: test_transaction2})
# Flush and check
updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
assert len(updates) == 2
# Find each transaction in the list of updates
found_transaction1 = False
found_transaction2 = False
for update in updates:
if test_key1 in update:
assert update[test_key1] == test_transaction1
found_transaction1 = True
if test_key2 in update:
assert update[test_key2] == test_transaction2
found_transaction2 = True
assert found_transaction1
assert found_transaction2
@pytest.mark.asyncio
async def test_aggregated_daily_spend_update_empty(daily_spend_update_queue):
"""Test aggregating updates from an empty queue"""
result = (
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
)
assert result == {}
@pytest.mark.asyncio
async def test_get_aggregated_daily_spend_update_transactions_single_key():
"""Test static method for aggregating a single key"""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
updates = [{test_key: test_transaction}]
# Test aggregation
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
updates
)
assert len(result) == 1
assert test_key in result
assert result[test_key] == test_transaction
@pytest.mark.asyncio
async def test_get_aggregated_daily_spend_update_transactions_multiple_keys():
"""Test static method for aggregating multiple different keys"""
test_key1 = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction1 = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
test_key2 = "user2_2023-01-01_key456_gpt-3.5-turbo_openai"
test_transaction2 = {
"spend": 5.0,
"prompt_tokens": 200,
"completion_tokens": 30,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
updates = [{test_key1: test_transaction1}, {test_key2: test_transaction2}]
# Test aggregation
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
updates
)
assert len(result) == 2
assert test_key1 in result
assert test_key2 in result
assert result[test_key1] == test_transaction1
assert result[test_key2] == test_transaction2
@pytest.mark.asyncio
async def test_get_aggregated_daily_spend_update_transactions_same_key():
"""Test static method for aggregating updates with the same key"""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction1 = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
test_transaction2 = {
"spend": 5.0,
"prompt_tokens": 200,
"completion_tokens": 30,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
expected_transaction = {
"spend": 15.0, # 10 + 5
"prompt_tokens": 300, # 100 + 200
"completion_tokens": 80, # 50 + 30
"api_requests": 2, # 1 + 1
"successful_requests": 2, # 1 + 1
"failed_requests": 0, # 0 + 0
}
updates = [{test_key: test_transaction1}, {test_key: test_transaction2}]
# Test aggregation
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
updates
)
assert len(result) == 1
assert test_key in result
assert result[test_key] == expected_transaction
@pytest.mark.asyncio
async def test_flush_and_get_aggregated_daily_spend_update_transactions(
daily_spend_update_queue,
):
"""Test the full workflow of adding, flushing, and aggregating updates"""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction1 = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
test_transaction2 = {
"spend": 5.0,
"prompt_tokens": 200,
"completion_tokens": 30,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
expected_transaction = {
"spend": 15.0, # 10 + 5
"prompt_tokens": 300, # 100 + 200
"completion_tokens": 80, # 50 + 30
"api_requests": 2, # 1 + 1
"successful_requests": 2, # 1 + 1
"failed_requests": 0, # 0 + 0
}
# Add updates to queue
await daily_spend_update_queue.add_update({test_key: test_transaction1})
await daily_spend_update_queue.add_update({test_key: test_transaction2})
# Test full workflow
result = (
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
)
assert len(result) == 1
assert test_key in result
assert result[test_key] == expected_transaction

View file

@ -12,7 +12,7 @@ sys.path.insert(
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS
from litellm.proxy.db.pod_lock_manager import PodLockManager from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
# Mock Prisma client class # Mock Prisma client class

View file

@ -0,0 +1,152 @@
import asyncio
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
from litellm.proxy._types import Litellm_EntityType, SpendUpdateQueueItem
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
@pytest.fixture
def spend_queue():
return SpendUpdateQueue()
@pytest.mark.asyncio
async def test_add_update(spend_queue):
# Test adding a single update
update: SpendUpdateQueueItem = {
"entity_type": Litellm_EntityType.USER,
"entity_id": "user123",
"response_cost": 0.5,
}
await spend_queue.add_update(update)
# Verify update was added by checking queue size
assert spend_queue.update_queue.qsize() == 1
@pytest.mark.asyncio
async def test_missing_response_cost(spend_queue):
# Test with missing response_cost - should default to 0
update: SpendUpdateQueueItem = {
"entity_type": Litellm_EntityType.USER,
"entity_id": "user123",
}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should have created entry with 0 cost
assert aggregated["user_list_transactions"]["user123"] == 0
@pytest.mark.asyncio
async def test_missing_entity_id(spend_queue):
# Test with missing entity_id - should default to empty string
update: SpendUpdateQueueItem = {
"entity_type": Litellm_EntityType.USER,
"response_cost": 1.0,
}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should use empty string as key
assert aggregated["user_list_transactions"][""] == 1.0
@pytest.mark.asyncio
async def test_none_values(spend_queue):
# Test with None values
update: SpendUpdateQueueItem = {
"entity_type": Litellm_EntityType.USER,
"entity_id": None, # type: ignore
"response_cost": None,
}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should handle None values gracefully
assert aggregated["user_list_transactions"][""] == 0
@pytest.mark.asyncio
async def test_multiple_updates_with_missing_fields(spend_queue):
# Test multiple updates with various missing fields
updates: list[SpendUpdateQueueItem] = [
{
"entity_type": Litellm_EntityType.USER,
"entity_id": "user123",
"response_cost": 0.5,
},
{
"entity_type": Litellm_EntityType.USER,
"entity_id": "user123", # missing response_cost
},
{
"entity_type": Litellm_EntityType.USER, # missing entity_id
"response_cost": 1.5,
},
]
for update in updates:
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Verify aggregation
assert (
aggregated["user_list_transactions"]["user123"] == 0.5
) # only the first update with valid cost
assert (
aggregated["user_list_transactions"][""] == 1.5
) # update with missing entity_id
@pytest.mark.asyncio
async def test_unknown_entity_type(spend_queue):
# Test with unknown entity type
update: SpendUpdateQueueItem = {
"entity_type": "UNKNOWN_TYPE", # type: ignore
"entity_id": "123",
"response_cost": 0.5,
}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should ignore unknown entity type
assert all(len(transactions) == 0 for transactions in aggregated.values())
@pytest.mark.asyncio
async def test_missing_entity_type(spend_queue):
# Test with missing entity type
update: SpendUpdateQueueItem = {"entity_id": "123", "response_cost": 0.5}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should ignore updates without entity type
assert all(len(transactions) == 0 for transactions in aggregated.values())

View file

@ -55,3 +55,30 @@ async def test_ui_view_users_with_null_email(mocker, caplog):
assert response == [ assert response == [
LiteLLM_UserTableFiltered(user_id="test-user-null-email", user_email=None) LiteLLM_UserTableFiltered(user_id="test-user-null-email", user_email=None)
] ]
def test_user_daily_activity_types():
"""
Assert all fiels in SpendMetrics are reported in DailySpendMetadata as "total_"
"""
from litellm.proxy.management_endpoints.internal_user_endpoints import (
DailySpendMetadata,
SpendMetrics,
)
# Create a SpendMetrics instance
spend_metrics = SpendMetrics()
# Create a DailySpendMetadata instance
daily_spend_metadata = DailySpendMetadata()
# Assert all fields in SpendMetrics are reported in DailySpendMetadata as "total_"
for field in spend_metrics.__dict__:
if field.startswith("total_"):
assert hasattr(
daily_spend_metadata, field
), f"Field {field} is not reported in DailySpendMetadata"
else:
assert not hasattr(
daily_spend_metadata, field
), f"Field {field} is reported in DailySpendMetadata"

View file

@ -0,0 +1,105 @@
import json
import os
import sys
from unittest.mock import MagicMock, patch
import pytest
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.litellm_pre_call_utils import (
_get_enforced_params,
check_if_token_is_service_account,
)
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
def test_check_if_token_is_service_account():
"""
Test that only keys with `service_account_id` in metadata are considered service accounts
"""
# Test case 1: Service account token
service_account_token = UserAPIKeyAuth(
api_key="test-key", metadata={"service_account_id": "test-service-account"}
)
assert check_if_token_is_service_account(service_account_token) == True
# Test case 2: Regular user token
regular_token = UserAPIKeyAuth(api_key="test-key", metadata={})
assert check_if_token_is_service_account(regular_token) == False
# Test case 3: Token with other metadata
other_metadata_token = UserAPIKeyAuth(
api_key="test-key", metadata={"user_id": "test-user"}
)
assert check_if_token_is_service_account(other_metadata_token) == False
def test_get_enforced_params_for_service_account_settings():
"""
Test that service account enforced params are only added to service account keys
"""
service_account_token = UserAPIKeyAuth(
api_key="test-key", metadata={"service_account_id": "test-service-account"}
)
general_settings_with_service_account_settings = {
"service_account_settings": {"enforced_params": ["metadata.service"]},
}
result = _get_enforced_params(
general_settings=general_settings_with_service_account_settings,
user_api_key_dict=service_account_token,
)
assert result == ["metadata.service"]
regular_token = UserAPIKeyAuth(
api_key="test-key", metadata={"enforced_params": ["user"]}
)
result = _get_enforced_params(
general_settings=general_settings_with_service_account_settings,
user_api_key_dict=regular_token,
)
assert result == ["user"]
@pytest.mark.parametrize(
"general_settings, user_api_key_dict, expected_enforced_params",
[
(
{"enforced_params": ["param1", "param2"]},
UserAPIKeyAuth(
api_key="test_api_key", user_id="test_user_id", org_id="test_org_id"
),
["param1", "param2"],
),
(
{"service_account_settings": {"enforced_params": ["param1", "param2"]}},
UserAPIKeyAuth(
api_key="test_api_key",
user_id="test_user_id",
org_id="test_org_id",
metadata={"service_account_id": "test_service_account_id"},
),
["param1", "param2"],
),
(
{"service_account_settings": {"enforced_params": ["param1", "param2"]}},
UserAPIKeyAuth(
api_key="test_api_key",
metadata={
"enforced_params": ["param3", "param4"],
"service_account_id": "test_service_account_id",
},
),
["param1", "param2", "param3", "param4"],
),
],
)
def test_get_enforced_params(
general_settings, user_api_key_dict, expected_enforced_params
):
from litellm.proxy.litellm_pre_call_utils import _get_enforced_params
enforced_params = _get_enforced_params(general_settings, user_api_key_dict)
assert enforced_params == expected_enforced_params

View file

@ -199,6 +199,42 @@ class BaseLLMChatTest(ABC):
) )
assert response is not None assert response is not None
def test_file_data_unit_test(self, pdf_messages):
from litellm.utils import supports_pdf_input, return_raw_request
from litellm.types.utils import CallTypes
from litellm.litellm_core_utils.prompt_templates.factory import convert_to_anthropic_image_obj
media_chunk = convert_to_anthropic_image_obj(
openai_image_url=pdf_messages,
format=None,
)
file_content = [
{"type": "text", "text": "What's this file about?"},
{
"type": "file",
"file": {
"file_data": pdf_messages,
}
},
]
image_messages = [{"role": "user", "content": file_content}]
base_completion_call_args = self.get_base_completion_call_args()
if not supports_pdf_input(base_completion_call_args["model"], None):
pytest.skip("Model does not support image input")
raw_request = return_raw_request(
endpoint=CallTypes.completion,
kwargs={**base_completion_call_args, "messages": image_messages},
)
print("RAW REQUEST", raw_request)
assert media_chunk["data"] in json.dumps(raw_request)
def test_message_with_name(self): def test_message_with_name(self):
try: try:
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -268,7 +268,7 @@ async def test_vision_with_custom_model():
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "" "url": ""
}, },
}, },
], ],

View file

@ -1379,3 +1379,20 @@ def test_azure_modalities_param():
) )
assert optional_params["modalities"] == ["text", "audio"] assert optional_params["modalities"] == ["text", "audio"]
assert optional_params["audio"] == {"type": "audio_input", "input": "test.wav"} assert optional_params["audio"] == {"type": "audio_input", "input": "test.wav"}
@pytest.mark.parametrize(
"model, provider",
[
("claude-3-7-sonnet-20240620-v1:0", "anthropic"),
("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"),
("invoke/anthropic.claude-3-7-sonnet-20240620-v1:0", "bedrock"),
("claude-3-7-sonnet@20250219", "vertex_ai"),
],
)
def test_anthropic_unified_reasoning_content(model, provider):
optional_params = get_optional_params(
model=model,
custom_llm_provider=provider,
reasoning_effort="high",
)
assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}

View file

@ -1280,7 +1280,8 @@ def test_completion_cost_databricks(model):
resp = litellm.completion(model=model, messages=messages) # works fine resp = litellm.completion(model=model, messages=messages) # works fine
print(resp) print(resp)
cost = completion_cost(completion_response=resp) print(f"hidden_params: {resp._hidden_params}")
assert resp._hidden_params["response_cost"] > 0
@pytest.mark.parametrize( @pytest.mark.parametrize(

View file

@ -62,6 +62,8 @@ from litellm.proxy._types import (
KeyRequest, KeyRequest,
NewUserRequest, NewUserRequest,
UpdateKeyRequest, UpdateKeyRequest,
SpendUpdateQueueItem,
Litellm_EntityType,
) )
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache()) proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
@ -93,7 +95,13 @@ def prisma_client():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_batch_update_spend(prisma_client): async def test_batch_update_spend(prisma_client):
prisma_client.user_list_transactions["test-litellm-user-5"] = 23 await proxy_logging_obj.db_spend_update_writer.spend_update_queue.add_update(
SpendUpdateQueueItem(
entity_type=Litellm_EntityType.USER,
entity_id="test-litellm-user-5",
response_cost=23,
)
)
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()

Some files were not shown because too many files have changed in this diff Show more