Merge branch 'BerriAI:main' into main

This commit is contained in:
sajda 2025-04-02 19:56:53 +05:30 committed by GitHub
commit 75f41a2d64
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
134 changed files with 3935 additions and 1451 deletions

View file

@ -1450,7 +1450,7 @@ jobs:
command: |
pwd
ls
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/spend_tracking_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
no_output_timeout: 120m
# Store test results
@ -1743,6 +1743,96 @@ jobs:
# Store test results
- store_test_results:
path: test-results
proxy_spend_accuracy_tests:
machine:
image: ubuntu-2204:2023.10.1
resource_class: xlarge
working_directory: ~/project
steps:
- checkout
- setup_google_dns
- run:
name: Install Docker CLI (In case it's not already installed)
command: |
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
- run:
name: Install Python 3.9
command: |
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda init bash
source ~/.bashrc
conda create -n myenv python=3.9 -y
conda activate myenv
python --version
- run:
name: Install Dependencies
command: |
pip install "pytest==7.3.1"
pip install "pytest-asyncio==0.21.1"
pip install aiohttp
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
- run:
name: Build Docker image
command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
- run:
name: Run Docker container
# intentionally give bad redis credentials here
# the OTEL test - should get this as a trace
command: |
docker run -d \
-p 4000:4000 \
-e DATABASE_URL=$PROXY_DATABASE_URL \
-e REDIS_HOST=$REDIS_HOST \
-e REDIS_PASSWORD=$REDIS_PASSWORD \
-e REDIS_PORT=$REDIS_PORT \
-e LITELLM_MASTER_KEY="sk-1234" \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
-e LITELLM_LICENSE=$LITELLM_LICENSE \
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-e USE_DDTRACE=True \
-e DD_API_KEY=$DD_API_KEY \
-e DD_SITE=$DD_SITE \
-e AWS_REGION_NAME=$AWS_REGION_NAME \
--name my-app \
-v $(pwd)/litellm/proxy/example_config_yaml/spend_tracking_config.yaml:/app/config.yaml \
my-app:latest \
--config /app/config.yaml \
--port 4000 \
--detailed_debug \
- run:
name: Install curl and dockerize
command: |
sudo apt-get update
sudo apt-get install -y curl
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
- run:
name: Start outputting logs
command: docker logs -f my-app
background: true
- run:
name: Wait for app to be ready
command: dockerize -wait http://localhost:4000 -timeout 5m
- run:
name: Run tests
command: |
pwd
ls
python -m pytest -vv tests/spend_tracking_tests -x --junitxml=test-results/junit.xml --durations=5
no_output_timeout:
120m
# Clean up first container
- run:
name: Stop and remove first container
command: |
docker stop my-app
docker rm my-app
proxy_multi_instance_tests:
machine:
@ -2553,6 +2643,12 @@ workflows:
only:
- main
- /litellm_.*/
- proxy_spend_accuracy_tests:
filters:
branches:
only:
- main
- /litellm_.*/
- proxy_multi_instance_tests:
filters:
branches:
@ -2714,6 +2810,7 @@ workflows:
- installing_litellm_on_python
- installing_litellm_on_python_3_13
- proxy_logging_guardrails_model_info_tests
- proxy_spend_accuracy_tests
- proxy_multi_instance_tests
- proxy_store_model_in_db_tests
- proxy_build_from_pip_tests

View file

@ -24,10 +24,10 @@ jobs:
run: |
poetry install --with dev
- name: Run Black formatting check
- name: Run Black formatting
run: |
cd litellm
poetry run black . --check
poetry run black .
cd ..
- name: Run Ruff linting

View file

@ -1,2 +1,11 @@
python3 -m build
twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ -
twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ -
Note: You might need to make a MANIFEST.ini file on root for build process incase it fails
Place this in MANIFEST.ini
recursive-exclude venv *
recursive-exclude myenv *
recursive-exclude py313_env *
recursive-exclude **/.venv *

View file

@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
# /v1/messages [BETA]
LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint.
Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format.
This currently just supports the Anthropic API.
## Overview
| Feature | Supported | Notes |
|-------|-------|-------|
@ -21,9 +22,61 @@ Planned improvement:
- Bedrock Anthropic support
## Usage
---
### LiteLLM Python SDK
#### Non-streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
Example response:
```json
{
"content": [
{
"text": "Hi! this is a very short joke",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-7-sonnet-20250219",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 2095,
"output_tokens": 503,
"cache_creation_input_tokens": 2095,
"cache_read_input_tokens": 0
}
}
```
#### Streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
stream=True,
)
async for chunk in response:
print(chunk)
```
### LiteLLM Proxy Server
<Tabs>
<TabItem label="PROXY" value="proxy">
1. Setup config.yaml
@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
3. Test it!
```bash
<Tabs>
<TabItem label="Anthropic Python SDK" value="python">
```python showLineNumbers title="Example using LiteLLM Proxy Server"
import anthropic
# point anthropic sdk to litellm proxy
client = anthropic.Anthropic(
base_url="http://0.0.0.0:4000",
api_key="sk-1234",
)
response = client.messages.create(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
</TabItem>
<TabItem label="curl" value="curl">
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
-H 'content-type: application/json' \
-H 'x-api-key: $LITELLM_API_KEY' \
@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "List 5 important events in the XIX century"
}
]
"content": "Hello, can you tell me a short joke?"
}
],
"max_tokens": 4096
"max_tokens": 100
}'
```
</TabItem>
<TabItem value="sdk" label="SDK">
</Tabs>
```python
from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
import asyncio
import os
# set env
os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
## Request Format
---
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
# Call the handler
async def call():
response = await anthropic_messages(
messages=messages,
api_key=api_key,
model="claude-3-haiku-20240307",
max_tokens=100,
)
#### Example request body
asyncio.run(call())
```json
{
"model": "claude-3-7-sonnet-20250219",
"max_tokens": 1024,
"messages": [
{
"role": "user",
"content": "Hello, world"
}
]
}
```
</TabItem>
</Tabs>
#### Required Fields
- **model** (string):
The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
- **max_tokens** (integer):
The maximum number of tokens to generate before stopping.
_Note: The model may stop before reaching this limit; value must be greater than 1._
- **messages** (array of objects):
An ordered list of conversational turns.
Each message object must include:
- **role** (enum: `"user"` or `"assistant"`):
Specifies the speaker of the message.
- **content** (string or array of content blocks):
The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.
_Example equivalence:_
```json
{"role": "user", "content": "Hello, Claude"}
```
is equivalent to:
```json
{"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
```
#### Optional Fields
- **metadata** (object):
Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
- **stop_sequences** (array of strings):
Custom sequences that, when encountered in the generated text, cause the model to stop.
- **stream** (boolean):
Indicates whether to stream the response using server-sent events.
- **system** (string or array):
A system prompt providing context or specific instructions to the model.
- **temperature** (number):
Controls randomness in the models responses. Valid range: `0 < temperature < 1`.
- **thinking** (object):
Configuration for enabling extended thinking. If enabled, it includes:
- **budget_tokens** (integer):
Minimum of 1024 tokens (and less than `max_tokens`).
- **type** (enum):
E.g., `"enabled"`.
- **tool_choice** (object):
Instructs how the model should utilize any provided tools.
- **tools** (array of objects):
Definitions for tools available to the model. Each tool includes:
- **name** (string):
The tools name.
- **description** (string):
A detailed description of the tool.
- **input_schema** (object):
A JSON schema describing the expected input format for the tool.
- **top_k** (integer):
Limits sampling to the top K options.
- **top_p** (number):
Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
## Response Format
---
Responses will be in the Anthropic messages API format.
#### Example Response
```json
{
"content": [
{
"text": "Hi! My name is Claude.",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-7-sonnet-20250219",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 2095,
"output_tokens": 503,
"cache_creation_input_tokens": 2095,
"cache_read_input_tokens": 0
}
}
```
#### Response fields
- **content** (array of objects):
Contains the generated content blocks from the model. Each block includes:
- **type** (string):
Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
- **text** (string):
The generated text from the model.
_Note: Maximum length is 5,000,000 characters._
- **citations** (array of objects or `null`):
Optional field providing citation details. Each citation includes:
- **cited_text** (string):
The excerpt being cited.
- **document_index** (integer):
An index referencing the cited document.
- **document_title** (string or `null`):
The title of the cited document.
- **start_char_index** (integer):
The starting character index for the citation.
- **end_char_index** (integer):
The ending character index for the citation.
- **type** (string):
Typically `"char_location"`.
- **id** (string):
A unique identifier for the response message.
_Note: The format and length of IDs may change over time._
- **model** (string):
Specifies the model that generated the response.
- **role** (string):
Indicates the role of the generated message. For responses, this is always `"assistant"`.
- **stop_reason** (string):
Explains why the model stopped generating text. Possible values include:
- `"end_turn"`: The model reached a natural stopping point.
- `"max_tokens"`: The generation stopped because the maximum token limit was reached.
- `"stop_sequence"`: A custom stop sequence was encountered.
- `"tool_use"`: The model invoked one or more tools.
- **stop_sequence** (string or `null`):
Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
- **type** (string):
Denotes the type of response object, which is always `"message"`.
- **usage** (object):
Provides details on token usage for billing and rate limiting. This includes:
- **input_tokens** (integer):
Total number of input tokens processed.
- **output_tokens** (integer):
Total number of output tokens generated.
- **cache_creation_input_tokens** (integer or `null`):
Number of tokens used to create a cache entry.
- **cache_read_input_tokens** (integer or `null`):
Number of tokens read from the cache.

View file

@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching/caching.py)
:::info

View file

@ -1,3 +1,5 @@
import Image from '@theme/IdealImage';
# Enterprise
For companies that need SSO, user management and professional support for LiteLLM Proxy
@ -7,6 +9,8 @@ Get free 7-day trial key [here](https://www.litellm.ai/#trial)
Includes all enterprise features.
<Image img={require('../img/enterprise_vs_oss.png')} />
[**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)

View file

@ -1035,8 +1035,10 @@ response = completion(
"content": [
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
{
"type": "image_url",
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
"type": "file",
"file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
},
],
}
@ -1081,8 +1083,10 @@ curl http://0.0.0.0:4000/v1/chat/completions \
"text": "You are a very professional document summarization specialist. Please summarize the given document"
},
{
"type": "image_url",
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
"type": "file",
"file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
}
}
]

View file

@ -1168,14 +1168,22 @@ os.environ["AWS_REGION_NAME"] = ""
# pdf url
image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
# Download the file
response = requests.get(url)
file_data = response.content
encoded_file = base64.b64encode(file_data).decode("utf-8")
# model
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
image_content = [
{"type": "text", "text": "What's this file about?"},
{
"type": "image_url",
"image_url": image_url, # OR {"url": image_url}
"type": "file",
"file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
},
]
@ -1221,8 +1229,10 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
"messages": [
{"role": "user", "content": {"type": "text", "text": "What's this file about?"}},
{
"type": "image_url",
"image_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
"type": "file",
"file": {
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
}
}
]
}'

View file

@ -365,7 +365,7 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
</Tabs>
## Specifying Safety Settings
In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
In certain use-cases you may need to make calls to the models and pass [safety settings](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
```python
response = completion(

View file

@ -82,7 +82,7 @@ from litellm import completion
os.environ["XAI_API_KEY"] = "your-api-key"
response = completion(
model="xai/grok-2-latest",
model="xai/grok-2-vision-latest",
messages=[
{
"role": "user",

View file

@ -23,6 +23,12 @@ In the newly created guard's page, you can find a reference to the prompt policy
You can decide which detections will be enabled, and set the threshold for each detection.
:::info
When using LiteLLM with virtual keys, key-specific policies can be set directly in Aim's guards page by specifying the virtual key alias when creating the guard.
Only the aliases of your virtual keys (and not the actual key secrets) will be sent to Aim.
:::
### 3. Add Aim Guardrail on your LiteLLM config.yaml
Define your guardrails under the `guardrails` section

View file

@ -17,6 +17,14 @@ model_list:
api_key: os.environ/OPENAI_API_KEY
guardrails:
- guardrail_name: general-guard
litellm_params:
guardrail: aim
mode: [pre_call, post_call]
api_key: os.environ/AIM_API_KEY
api_base: os.environ/AIM_API_BASE
default_on: true # Optional
- guardrail_name: "aporia-pre-guard"
litellm_params:
guardrail: aporia # supported values: "aporia", "lakera"
@ -45,6 +53,7 @@ guardrails:
- `pre_call` Run **before** LLM call, on **input**
- `post_call` Run **after** LLM call, on **input & output**
- `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call. Response not returned until guardrail check completes
- A list of the above values to run multiple modes, e.g. `mode: [pre_call, post_call]`
## 2. Start LiteLLM Gateway
@ -569,4 +578,4 @@ guardrails: Union[
class DynamicGuardrailParams:
extra_body: Dict[str, Any] # Additional parameters for the guardrail
```
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

View file

@ -137,15 +137,17 @@ const sidebars = {
label: "[Beta] Guardrails",
items: [
"proxy/guardrails/quick_start",
"proxy/guardrails/aim_security",
"proxy/guardrails/aporia_api",
"proxy/guardrails/bedrock",
"proxy/guardrails/guardrails_ai",
"proxy/guardrails/lakera_ai",
"proxy/guardrails/pii_masking_v2",
"proxy/guardrails/secret_detection",
"proxy/guardrails/custom_guardrail",
"prompt_injection"
...[
"proxy/guardrails/aim_security",
"proxy/guardrails/aporia_api",
"proxy/guardrails/bedrock",
"proxy/guardrails/guardrails_ai",
"proxy/guardrails/lakera_ai",
"proxy/guardrails/pii_masking_v2",
"proxy/guardrails/secret_detection",
"proxy/guardrails/custom_guardrail",
"proxy/guardrails/prompt_injection",
].sort(),
],
},
{

Binary file not shown.

View file

@ -0,0 +1,4 @@
-- AlterTable
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "failed_requests" INTEGER NOT NULL DEFAULT 0,
ADD COLUMN "successful_requests" INTEGER NOT NULL DEFAULT 0;

7
litellm-proxy-extras/poetry.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
package = []
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "2cf39473e67ff0615f0a61c9d2ac9f02b38cc08cbb1bdb893d89bee002646623"

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm-proxy-extras"
version = "0.1.1"
version = "0.1.2"
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
authors = ["BerriAI"]
readme = "README.md"
@ -22,7 +22,7 @@ requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "0.1.1"
version = "0.1.2"
version_files = [
"pyproject.toml:version",
"../requirements.txt:litellm-proxy-extras==",

View file

@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
### ADAPTERS ###
from .types.adapter import AdapterItem
import litellm.anthropic_interface as anthropic
adapters: List[AdapterItem] = []

View file

@ -214,7 +214,7 @@ def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
# Set up the Sentinel client
sentinel = redis.Sentinel(
sentinel_nodes,
sentinel_nodes,
socket_timeout=0.1,
password=sentinel_password,
)

View file

@ -3,4 +3,4 @@ import importlib_metadata
try:
version = importlib_metadata.version("litellm")
except Exception:
pass
version = "unknown"

View file

@ -0,0 +1,6 @@
"""
Anthropic module for LiteLLM
"""
from .messages import acreate, create
__all__ = ["acreate", "create"]

View file

@ -0,0 +1,117 @@
"""
Interface for Anthropic's messages API
Use this to call LLMs in Anthropic /messages Request/Response format
This is an __init__.py file to allow the following interface
- litellm.messages.acreate
- litellm.messages.create
"""
from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
anthropic_messages as _async_anthropic_messages,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
async def acreate(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = 1.0,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
**kwargs
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""
Async wrapper for Anthropic's messages API
Args:
max_tokens (int): Maximum tokens to generate (required)
messages (List[Dict]): List of message objects with role and content (required)
model (str): Model name to use (required)
metadata (Dict, optional): Request metadata
stop_sequences (List[str], optional): Custom stop sequences
stream (bool, optional): Whether to stream the response
system (str, optional): System prompt
temperature (float, optional): Sampling temperature (0.0 to 1.0)
thinking (Dict, optional): Extended thinking configuration
tool_choice (Dict, optional): Tool choice configuration
tools (List[Dict], optional): List of tool definitions
top_k (int, optional): Top K sampling parameter
top_p (float, optional): Nucleus sampling parameter
**kwargs: Additional arguments
Returns:
Dict: Response from the API
"""
return await _async_anthropic_messages(
max_tokens=max_tokens,
messages=messages,
model=model,
metadata=metadata,
stop_sequences=stop_sequences,
stream=stream,
system=system,
temperature=temperature,
thinking=thinking,
tool_choice=tool_choice,
tools=tools,
top_k=top_k,
top_p=top_p,
**kwargs,
)
async def create(
max_tokens: int,
messages: List[Dict],
model: str,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = 1.0,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
**kwargs
) -> Union[AnthropicMessagesResponse, Iterator]:
"""
Async wrapper for Anthropic's messages API
Args:
max_tokens (int): Maximum tokens to generate (required)
messages (List[Dict]): List of message objects with role and content (required)
model (str): Model name to use (required)
metadata (Dict, optional): Request metadata
stop_sequences (List[str], optional): Custom stop sequences
stream (bool, optional): Whether to stream the response
system (str, optional): System prompt
temperature (float, optional): Sampling temperature (0.0 to 1.0)
thinking (Dict, optional): Extended thinking configuration
tool_choice (Dict, optional): Tool choice configuration
tools (List[Dict], optional): List of tool definitions
top_k (int, optional): Top K sampling parameter
top_p (float, optional): Nucleus sampling parameter
**kwargs: Additional arguments
Returns:
Dict: Response from the API
"""
raise NotImplementedError("This function is not implemented")

View file

@ -0,0 +1,116 @@
## Use LLM API endpoints in Anthropic Interface
Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
## Usage
---
### LiteLLM Python SDK
#### Non-streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
Example response:
```json
{
"content": [
{
"text": "Hi! this is a very short joke",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-7-sonnet-20250219",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 2095,
"output_tokens": 503,
"cache_creation_input_tokens": 2095,
"cache_read_input_tokens": 0
}
}
```
#### Streaming example
```python showLineNumbers title="Example using LiteLLM Python SDK"
import litellm
response = await litellm.anthropic.messages.acreate(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
api_key=api_key,
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
stream=True,
)
async for chunk in response:
print(chunk)
```
### LiteLLM Proxy Server
1. Setup config.yaml
```yaml
model_list:
- model_name: anthropic-claude
litellm_params:
model: claude-3-7-sonnet-latest
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
<Tabs>
<TabItem label="Anthropic Python SDK" value="python">
```python showLineNumbers title="Example using LiteLLM Proxy Server"
import anthropic
# point anthropic sdk to litellm proxy
client = anthropic.Anthropic(
base_url="http://0.0.0.0:4000",
api_key="sk-1234",
)
response = client.messages.create(
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
model="anthropic/claude-3-haiku-20240307",
max_tokens=100,
)
```
</TabItem>
<TabItem label="curl" value="curl">
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
-H 'content-type: application/json' \
-H 'x-api-key: $LITELLM_API_KEY' \
-H 'anthropic-version: 2023-06-01' \
-d '{
"model": "anthropic-claude",
"messages": [
{
"role": "user",
"content": "Hello, can you tell me a short joke?"
}
],
"max_tokens": 100
}'
```

View file

@ -19,6 +19,7 @@ DEFAULT_IMAGE_HEIGHT = 300
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer"
MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
#### RELIABILITY ####
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.

View file

@ -550,6 +550,7 @@ def completion_cost( # noqa: PLR0915
custom_pricing: Optional[bool] = None,
base_model: Optional[str] = None,
standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
litellm_model_name: Optional[str] = None,
) -> float:
"""
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@ -602,7 +603,7 @@ def completion_cost( # noqa: PLR0915
completion_response=completion_response
)
rerank_billed_units: Optional[RerankBilledUnits] = None
model = _select_model_name_for_cost_calc(
selected_model = _select_model_name_for_cost_calc(
model=model,
completion_response=completion_response,
custom_llm_provider=custom_llm_provider,
@ -610,232 +611,268 @@ def completion_cost( # noqa: PLR0915
base_model=base_model,
)
verbose_logger.info(f"selected model name for cost calculation: {model}")
potential_model_names = [selected_model]
if model is not None:
potential_model_names.append(model)
if completion_response is not None and (
isinstance(completion_response, BaseModel)
or isinstance(completion_response, dict)
): # tts returns a custom class
if isinstance(completion_response, dict):
usage_obj: Optional[Union[dict, Usage]] = completion_response.get(
"usage", {}
)
else:
usage_obj = getattr(completion_response, "usage", {})
if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
usage_obj=usage_obj
):
setattr(
completion_response,
"usage",
litellm.Usage(**usage_obj.model_dump()),
)
if usage_obj is None:
_usage = {}
elif isinstance(usage_obj, BaseModel):
_usage = usage_obj.model_dump()
else:
_usage = usage_obj
if ResponseAPILoggingUtils._is_response_api_usage(_usage):
_usage = (
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
_usage
).model_dump()
)
# get input/output tokens from completion_response
prompt_tokens = _usage.get("prompt_tokens", 0)
completion_tokens = _usage.get("completion_tokens", 0)
cache_creation_input_tokens = _usage.get("cache_creation_input_tokens", 0)
cache_read_input_tokens = _usage.get("cache_read_input_tokens", 0)
if (
"prompt_tokens_details" in _usage
and _usage["prompt_tokens_details"] != {}
and _usage["prompt_tokens_details"]
):
prompt_tokens_details = _usage.get("prompt_tokens_details", {})
cache_read_input_tokens = prompt_tokens_details.get("cached_tokens", 0)
total_time = getattr(completion_response, "_response_ms", 0)
hidden_params = getattr(completion_response, "_hidden_params", None)
if hidden_params is not None:
custom_llm_provider = hidden_params.get(
"custom_llm_provider", custom_llm_provider or None
)
region_name = hidden_params.get("region_name", region_name)
size = hidden_params.get("optional_params", {}).get(
"size", "1024-x-1024"
) # openai default
quality = hidden_params.get("optional_params", {}).get(
"quality", "standard"
) # openai default
n = hidden_params.get("optional_params", {}).get(
"n", 1
) # openai default
else:
if model is None:
raise ValueError(
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
)
if len(messages) > 0:
prompt_tokens = token_counter(model=model, messages=messages)
elif len(prompt) > 0:
prompt_tokens = token_counter(model=model, text=prompt)
completion_tokens = token_counter(model=model, text=completion)
if model is None:
raise ValueError(
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
)
if custom_llm_provider is None:
for idx, model in enumerate(potential_model_names):
try:
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
model=model
) # strip the llm provider from the model name -> for image gen cost calculation
verbose_logger.info(
f"selected model name for cost calculation: {model}"
)
if completion_response is not None and (
isinstance(completion_response, BaseModel)
or isinstance(completion_response, dict)
): # tts returns a custom class
if isinstance(completion_response, dict):
usage_obj: Optional[
Union[dict, Usage]
] = completion_response.get("usage", {})
else:
usage_obj = getattr(completion_response, "usage", {})
if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
usage_obj=usage_obj
):
setattr(
completion_response,
"usage",
litellm.Usage(**usage_obj.model_dump()),
)
if usage_obj is None:
_usage = {}
elif isinstance(usage_obj, BaseModel):
_usage = usage_obj.model_dump()
else:
_usage = usage_obj
if ResponseAPILoggingUtils._is_response_api_usage(_usage):
_usage = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
_usage
).model_dump()
# get input/output tokens from completion_response
prompt_tokens = _usage.get("prompt_tokens", 0)
completion_tokens = _usage.get("completion_tokens", 0)
cache_creation_input_tokens = _usage.get(
"cache_creation_input_tokens", 0
)
cache_read_input_tokens = _usage.get("cache_read_input_tokens", 0)
if (
"prompt_tokens_details" in _usage
and _usage["prompt_tokens_details"] != {}
and _usage["prompt_tokens_details"]
):
prompt_tokens_details = _usage.get("prompt_tokens_details", {})
cache_read_input_tokens = prompt_tokens_details.get(
"cached_tokens", 0
)
total_time = getattr(completion_response, "_response_ms", 0)
hidden_params = getattr(completion_response, "_hidden_params", None)
if hidden_params is not None:
custom_llm_provider = hidden_params.get(
"custom_llm_provider", custom_llm_provider or None
)
region_name = hidden_params.get("region_name", region_name)
size = hidden_params.get("optional_params", {}).get(
"size", "1024-x-1024"
) # openai default
quality = hidden_params.get("optional_params", {}).get(
"quality", "standard"
) # openai default
n = hidden_params.get("optional_params", {}).get(
"n", 1
) # openai default
else:
if model is None:
raise ValueError(
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
)
if len(messages) > 0:
prompt_tokens = token_counter(model=model, messages=messages)
elif len(prompt) > 0:
prompt_tokens = token_counter(model=model, text=prompt)
completion_tokens = token_counter(model=model, text=completion)
if model is None:
raise ValueError(
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
)
if custom_llm_provider is None:
try:
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
model=model
) # strip the llm provider from the model name -> for image gen cost calculation
except Exception as e:
verbose_logger.debug(
"litellm.cost_calculator.py::completion_cost() - Error inferring custom_llm_provider - {}".format(
str(e)
)
)
if (
call_type == CallTypes.image_generation.value
or call_type == CallTypes.aimage_generation.value
or call_type
== PassthroughCallTypes.passthrough_image_generation.value
):
### IMAGE GENERATION COST CALCULATION ###
if custom_llm_provider == "vertex_ai":
if isinstance(completion_response, ImageResponse):
return vertex_ai_image_cost_calculator(
model=model,
image_response=completion_response,
)
elif custom_llm_provider == "bedrock":
if isinstance(completion_response, ImageResponse):
return bedrock_image_cost_calculator(
model=model,
size=size,
image_response=completion_response,
optional_params=optional_params,
)
raise TypeError(
"completion_response must be of type ImageResponse for bedrock image cost calculation"
)
else:
return default_image_cost_calculator(
model=model,
quality=quality,
custom_llm_provider=custom_llm_provider,
n=n,
size=size,
optional_params=optional_params,
)
elif (
call_type == CallTypes.speech.value
or call_type == CallTypes.aspeech.value
):
prompt_characters = litellm.utils._count_characters(text=prompt)
elif (
call_type == CallTypes.atranscription.value
or call_type == CallTypes.transcription.value
):
audio_transcription_file_duration = getattr(
completion_response, "duration", 0.0
)
elif (
call_type == CallTypes.rerank.value
or call_type == CallTypes.arerank.value
):
if completion_response is not None and isinstance(
completion_response, RerankResponse
):
meta_obj = completion_response.meta
if meta_obj is not None:
billed_units = meta_obj.get("billed_units", {}) or {}
else:
billed_units = {}
rerank_billed_units = RerankBilledUnits(
search_units=billed_units.get("search_units"),
total_tokens=billed_units.get("total_tokens"),
)
search_units = (
billed_units.get("search_units") or 1
) # cohere charges per request by default.
completion_tokens = search_units
# Calculate cost based on prompt_tokens, completion_tokens
if (
"togethercomputer" in model
or "together_ai" in model
or custom_llm_provider == "together_ai"
):
# together ai prices based on size of llm
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
model = get_model_params_and_category(
model, call_type=CallTypes(call_type)
)
# replicate llms are calculate based on time for request running
# see https://replicate.com/pricing
elif (
model in litellm.replicate_models or "replicate" in model
) and model not in litellm.model_cost:
# for unmapped replicate model, default to replicate's time tracking logic
return get_replicate_completion_pricing(completion_response, total_time) # type: ignore
if model is None:
raise ValueError(
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
)
if (
custom_llm_provider is not None
and custom_llm_provider == "vertex_ai"
):
# Calculate the prompt characters + response characters
if len(messages) > 0:
prompt_string = litellm.utils.get_formatted_prompt(
data={"messages": messages}, call_type="completion"
)
prompt_characters = litellm.utils._count_characters(
text=prompt_string
)
if completion_response is not None and isinstance(
completion_response, ModelResponse
):
completion_string = litellm.utils.get_response_string(
response_obj=completion_response
)
completion_characters = litellm.utils._count_characters(
text=completion_string
)
(
prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = cost_per_token(
model=model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
custom_llm_provider=custom_llm_provider,
response_time_ms=total_time,
region_name=region_name,
custom_cost_per_second=custom_cost_per_second,
custom_cost_per_token=custom_cost_per_token,
prompt_characters=prompt_characters,
completion_characters=completion_characters,
cache_creation_input_tokens=cache_creation_input_tokens,
cache_read_input_tokens=cache_read_input_tokens,
usage_object=cost_per_token_usage_object,
call_type=cast(CallTypesLiteral, call_type),
audio_transcription_file_duration=audio_transcription_file_duration,
rerank_billed_units=rerank_billed_units,
)
_final_cost = (
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
_final_cost += (
StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
model=model,
response_object=completion_response,
standard_built_in_tools_params=standard_built_in_tools_params,
custom_llm_provider=custom_llm_provider,
)
)
return _final_cost
except Exception as e:
verbose_logger.debug(
"litellm.cost_calculator.py::completion_cost() - Error inferring custom_llm_provider - {}".format(
str(e)
"litellm.cost_calculator.py::completion_cost() - Error calculating cost for model={} - {}".format(
model, str(e)
)
)
if (
call_type == CallTypes.image_generation.value
or call_type == CallTypes.aimage_generation.value
or call_type == PassthroughCallTypes.passthrough_image_generation.value
):
### IMAGE GENERATION COST CALCULATION ###
if custom_llm_provider == "vertex_ai":
if isinstance(completion_response, ImageResponse):
return vertex_ai_image_cost_calculator(
model=model,
image_response=completion_response,
)
elif custom_llm_provider == "bedrock":
if isinstance(completion_response, ImageResponse):
return bedrock_image_cost_calculator(
model=model,
size=size,
image_response=completion_response,
optional_params=optional_params,
)
raise TypeError(
"completion_response must be of type ImageResponse for bedrock image cost calculation"
)
else:
return default_image_cost_calculator(
model=model,
quality=quality,
custom_llm_provider=custom_llm_provider,
n=n,
size=size,
optional_params=optional_params,
)
elif (
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
):
prompt_characters = litellm.utils._count_characters(text=prompt)
elif (
call_type == CallTypes.atranscription.value
or call_type == CallTypes.transcription.value
):
audio_transcription_file_duration = getattr(
completion_response, "duration", 0.0
if idx == len(potential_model_names) - 1:
raise e
raise Exception(
"Unable to calculat cost for received potential model names - {}".format(
potential_model_names
)
elif (
call_type == CallTypes.rerank.value or call_type == CallTypes.arerank.value
):
if completion_response is not None and isinstance(
completion_response, RerankResponse
):
meta_obj = completion_response.meta
if meta_obj is not None:
billed_units = meta_obj.get("billed_units", {}) or {}
else:
billed_units = {}
rerank_billed_units = RerankBilledUnits(
search_units=billed_units.get("search_units"),
total_tokens=billed_units.get("total_tokens"),
)
search_units = (
billed_units.get("search_units") or 1
) # cohere charges per request by default.
completion_tokens = search_units
# Calculate cost based on prompt_tokens, completion_tokens
if (
"togethercomputer" in model
or "together_ai" in model
or custom_llm_provider == "together_ai"
):
# together ai prices based on size of llm
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
model = get_model_params_and_category(model, call_type=CallTypes(call_type))
# replicate llms are calculate based on time for request running
# see https://replicate.com/pricing
elif (
model in litellm.replicate_models or "replicate" in model
) and model not in litellm.model_cost:
# for unmapped replicate model, default to replicate's time tracking logic
return get_replicate_completion_pricing(completion_response, total_time) # type: ignore
if model is None:
raise ValueError(
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
)
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
# Calculate the prompt characters + response characters
if len(messages) > 0:
prompt_string = litellm.utils.get_formatted_prompt(
data={"messages": messages}, call_type="completion"
)
prompt_characters = litellm.utils._count_characters(text=prompt_string)
if completion_response is not None and isinstance(
completion_response, ModelResponse
):
completion_string = litellm.utils.get_response_string(
response_obj=completion_response
)
completion_characters = litellm.utils._count_characters(
text=completion_string
)
(
prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar,
) = cost_per_token(
model=model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
custom_llm_provider=custom_llm_provider,
response_time_ms=total_time,
region_name=region_name,
custom_cost_per_second=custom_cost_per_second,
custom_cost_per_token=custom_cost_per_token,
prompt_characters=prompt_characters,
completion_characters=completion_characters,
cache_creation_input_tokens=cache_creation_input_tokens,
cache_read_input_tokens=cache_read_input_tokens,
usage_object=cost_per_token_usage_object,
call_type=call_type,
audio_transcription_file_duration=audio_transcription_file_duration,
rerank_billed_units=rerank_billed_units,
)
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
_final_cost += StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
model=model,
response_object=completion_response,
standard_built_in_tools_params=standard_built_in_tools_params,
custom_llm_provider=custom_llm_provider,
)
return _final_cost
except Exception as e:
raise e
@ -897,6 +934,7 @@ def response_cost_calculator(
custom_pricing: Optional[bool] = None,
prompt: str = "",
standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
litellm_model_name: Optional[str] = None,
) -> float:
"""
Returns

View file

@ -290,6 +290,7 @@ class Logging(LiteLLMLoggingBaseClass):
"input": _input,
"litellm_params": litellm_params,
"applied_guardrails": applied_guardrails,
"model": model,
}
def process_dynamic_callbacks(self):
@ -892,6 +893,7 @@ class Logging(LiteLLMLoggingBaseClass):
ResponseCompletedEvent,
],
cache_hit: Optional[bool] = None,
litellm_model_name: Optional[str] = None,
) -> Optional[float]:
"""
Calculate response cost using result + logging object variables.
@ -917,7 +919,7 @@ class Logging(LiteLLMLoggingBaseClass):
try:
response_cost_calculator_kwargs = {
"response_object": result,
"model": self.model,
"model": litellm_model_name or self.model,
"cache_hit": cache_hit,
"custom_llm_provider": self.model_call_details.get(
"custom_llm_provider", None
@ -1009,6 +1011,10 @@ class Logging(LiteLLMLoggingBaseClass):
return False
return True
def _update_completion_start_time(self, completion_start_time: datetime.datetime):
self.completion_start_time = completion_start_time
self.model_call_details["completion_start_time"] = self.completion_start_time
def _success_handler_helper_fn(
self,
result=None,

View file

@ -22,6 +22,7 @@ from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionAssistantMessage,
ChatCompletionAssistantToolCall,
ChatCompletionFileObject,
ChatCompletionFunctionMessage,
ChatCompletionImageObject,
ChatCompletionTextObject,
@ -1455,6 +1456,25 @@ def anthropic_messages_pt( # noqa: PLR0915
user_content.append(_content_element)
elif m.get("type", "") == "document":
user_content.append(cast(AnthropicMessagesDocumentParam, m))
elif m.get("type", "") == "file":
file_message = cast(ChatCompletionFileObject, m)
file_data = file_message["file"].get("file_data")
if file_data:
image_chunk = convert_to_anthropic_image_obj(
openai_image_url=file_data,
format=file_message["file"].get("format"),
)
anthropic_document_param = (
AnthropicMessagesDocumentParam(
type="document",
source=AnthropicContentParamSource(
type="base64",
media_type=image_chunk["media_type"],
data=image_chunk["data"],
),
)
)
user_content.append(anthropic_document_param)
elif isinstance(user_message_types_block["content"], str):
_anthropic_content_text_element: AnthropicMessagesTextParam = {
"type": "text",
@ -2885,6 +2905,11 @@ class BedrockConverseMessagesProcessor:
image_url=image_url, format=format
)
_parts.append(_part) # type: ignore
elif element["type"] == "file":
_part = await BedrockConverseMessagesProcessor._async_process_file_message(
message=cast(ChatCompletionFileObject, element)
)
_parts.append(_part)
_cache_point_block = (
litellm.AmazonConverseConfig()._get_cache_point_block(
message_block=cast(
@ -3054,6 +3079,45 @@ class BedrockConverseMessagesProcessor:
reasoning_content_blocks.append(bedrock_content_block)
return reasoning_content_blocks
@staticmethod
def _process_file_message(message: ChatCompletionFileObject) -> BedrockContentBlock:
file_message = message["file"]
file_data = file_message.get("file_data")
file_id = file_message.get("file_id")
if file_data is None and file_id is None:
raise litellm.BadRequestError(
message="file_data and file_id cannot both be None. Got={}".format(
message
),
model="",
llm_provider="bedrock",
)
format = file_message.get("format")
return BedrockImageProcessor.process_image_sync(
image_url=cast(str, file_id or file_data), format=format
)
@staticmethod
async def _async_process_file_message(
message: ChatCompletionFileObject,
) -> BedrockContentBlock:
file_message = message["file"]
file_data = file_message.get("file_data")
file_id = file_message.get("file_id")
format = file_message.get("format")
if file_data is None and file_id is None:
raise litellm.BadRequestError(
message="file_data and file_id cannot both be None. Got={}".format(
message
),
model="",
llm_provider="bedrock",
)
return await BedrockImageProcessor.process_image_async(
image_url=cast(str, file_id or file_data), format=format
)
def _bedrock_converse_messages_pt( # noqa: PLR0915
messages: List,
@ -3126,6 +3190,13 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
format=format,
)
_parts.append(_part) # type: ignore
elif element["type"] == "file":
_part = (
BedrockConverseMessagesProcessor._process_file_message(
message=cast(ChatCompletionFileObject, element)
)
)
_parts.append(_part)
_cache_point_block = (
litellm.AmazonConverseConfig()._get_cache_point_block(
message_block=cast(

View file

@ -1,5 +1,6 @@
import asyncio
import collections.abc
import datetime
import json
import threading
import time
@ -1567,6 +1568,10 @@ class CustomStreamWrapper:
if response is None:
continue
if self.logging_obj.completion_start_time is None:
self.logging_obj._update_completion_start_time(
completion_start_time=datetime.datetime.now()
)
## LOGGING
executor.submit(
self.run_success_logging_and_cache_storage,
@ -1721,6 +1726,11 @@ class CustomStreamWrapper:
if processed_chunk is None:
continue
if self.logging_obj.completion_start_time is None:
self.logging_obj._update_completion_start_time(
completion_start_time=datetime.datetime.now()
)
choice = processed_chunk.choices[0]
if isinstance(choice, StreamingChoices):
self.response_uptil_now += choice.delta.get("content", "") or ""

View file

@ -18,8 +18,10 @@ from litellm.types.llms.anthropic import (
AnthropicMessagesTool,
AnthropicMessagesToolChoice,
AnthropicSystemMessageContent,
AnthropicThinkingParam,
)
from litellm.types.llms.openai import (
REASONING_EFFORT,
AllMessageValues,
ChatCompletionCachedContent,
ChatCompletionSystemMessage,
@ -94,6 +96,7 @@ class AnthropicConfig(BaseConfig):
"parallel_tool_calls",
"response_format",
"user",
"reasoning_effort",
]
if "claude-3-7-sonnet" in model:
@ -141,15 +144,9 @@ class AnthropicConfig(BaseConfig):
if user_anthropic_beta_headers is not None:
betas.update(user_anthropic_beta_headers)
# Handle beta headers for Vertex AI
# We allow prompt caching beta header for Vertex, but exclude other beta headers that might cause issues
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
if is_vertex_request is True:
vertex_safe_betas = set()
# Allow prompt caching beta header for Vertex
if "prompt-caching-2024-07-31" in betas:
vertex_safe_betas.add("prompt-caching-2024-07-31")
if len(vertex_safe_betas) > 0:
headers["anthropic-beta"] = ",".join(vertex_safe_betas)
pass
elif len(betas) > 0:
headers["anthropic-beta"] = ",".join(betas)
@ -297,6 +294,21 @@ class AnthropicConfig(BaseConfig):
new_stop = new_v
return new_stop
@staticmethod
def _map_reasoning_effort(
reasoning_effort: Optional[Union[REASONING_EFFORT, str]]
) -> Optional[AnthropicThinkingParam]:
if reasoning_effort is None:
return None
elif reasoning_effort == "low":
return AnthropicThinkingParam(type="enabled", budget_tokens=1024)
elif reasoning_effort == "medium":
return AnthropicThinkingParam(type="enabled", budget_tokens=2048)
elif reasoning_effort == "high":
return AnthropicThinkingParam(type="enabled", budget_tokens=4096)
else:
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
def map_openai_params(
self,
non_default_params: dict,
@ -308,10 +320,6 @@ class AnthropicConfig(BaseConfig):
non_default_params=non_default_params
)
## handle thinking tokens
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens"] = value
@ -376,7 +384,15 @@ class AnthropicConfig(BaseConfig):
optional_params["metadata"] = {"user_id": value}
if param == "thinking":
optional_params["thinking"] = value
elif param == "reasoning_effort" and isinstance(value, str):
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
value
)
## handle thinking tokens
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
return optional_params
def _create_json_tool_call_for_response_format(

View file

@ -6,7 +6,7 @@
"""
import json
from typing import Any, AsyncIterator, Dict, Optional, Union, cast
from typing import AsyncIterator, Dict, List, Optional, Union, cast
import httpx
@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
)
from litellm.types.llms.anthropic_messages.anthropic_response import (
AnthropicMessagesResponse,
)
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import ProviderSpecificHeader
from litellm.utils import ProviderConfigManager, client
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
@client
async def anthropic_messages(
api_key: str,
max_tokens: int,
messages: List[Dict],
model: str,
stream: bool = False,
metadata: Optional[Dict] = None,
stop_sequences: Optional[List[str]] = None,
stream: Optional[bool] = False,
system: Optional[str] = None,
temperature: Optional[float] = None,
thinking: Optional[Dict] = None,
tool_choice: Optional[Dict] = None,
tools: Optional[List[Dict]] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client: Optional[AsyncHTTPHandler] = None,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> Union[Dict[str, Any], AsyncIterator]:
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
"""
@ -129,10 +143,8 @@ async def anthropic_messages(
},
custom_llm_provider=_custom_llm_provider,
)
litellm_logging_obj.model_call_details.update(kwargs)
# Prepare request body
request_body = kwargs.copy()
request_body = locals().copy()
request_body = {
k: v
for k, v in request_body.items()
@ -140,10 +152,12 @@ async def anthropic_messages(
in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
model=model
)
and v is not None
}
request_body["stream"] = stream
request_body["model"] = model
litellm_logging_obj.stream = stream
litellm_logging_obj.model_call_details.update(request_body)
# Make the request
request_url = anthropic_messages_provider_config.get_complete_url(
@ -164,7 +178,7 @@ async def anthropic_messages(
url=request_url,
headers=headers,
data=json.dumps(request_body),
stream=stream,
stream=stream or False,
)
response.raise_for_status()

View file

@ -104,7 +104,10 @@ class BaseConfig(ABC):
return type_to_response_format_param(response_format=response_format)
def is_thinking_enabled(self, non_default_params: dict) -> bool:
return non_default_params.get("thinking", {}).get("type", None) == "enabled"
return (
non_default_params.get("thinking", {}).get("type") == "enabled"
or non_default_params.get("reasoning_effort") is not None
)
def update_optional_params_with_thinking_tokens(
self, non_default_params: dict, optional_params: dict
@ -116,9 +119,9 @@ class BaseConfig(ABC):
if 'thinking' is enabled and 'max_tokens' is not specified, set 'max_tokens' to the thinking token budget + DEFAULT_MAX_TOKENS
"""
is_thinking_enabled = self.is_thinking_enabled(non_default_params)
is_thinking_enabled = self.is_thinking_enabled(optional_params)
if is_thinking_enabled and "max_tokens" not in non_default_params:
thinking_token_budget = cast(dict, non_default_params["thinking"]).get(
thinking_token_budget = cast(dict, optional_params["thinking"]).get(
"budget_tokens", None
)
if thinking_token_budget is not None:

View file

@ -17,6 +17,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
_bedrock_converse_messages_pt,
_bedrock_tools_pt,
)
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.types.llms.bedrock import *
from litellm.types.llms.openai import (
@ -128,6 +129,7 @@ class AmazonConverseConfig(BaseConfig):
"claude-3-7" in model
): # [TODO]: move to a 'supports_reasoning_content' param from model cost map
supported_params.append("thinking")
supported_params.append("reasoning_effort")
return supported_params
def map_tool_choice_values(
@ -218,9 +220,7 @@ class AmazonConverseConfig(BaseConfig):
messages: Optional[List[AllMessageValues]] = None,
) -> dict:
is_thinking_enabled = self.is_thinking_enabled(non_default_params)
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
for param, value in non_default_params.items():
if param == "response_format" and isinstance(value, dict):
ignore_response_format_types = ["text"]
@ -297,6 +297,14 @@ class AmazonConverseConfig(BaseConfig):
optional_params["tool_choice"] = _tool_choice_value
if param == "thinking":
optional_params["thinking"] = value
elif param == "reasoning_effort" and isinstance(value, str):
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
value
)
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
return optional_params

View file

@ -12,6 +12,7 @@ import httpx
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.types.llms.openrouter import OpenRouterErrorMessage
from litellm.types.utils import ModelResponse, ModelResponseStream
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
@ -71,6 +72,24 @@ class OpenrouterConfig(OpenAIGPTConfig):
class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
try:
## HANDLE ERROR IN CHUNK ##
if "error" in chunk:
error_chunk = chunk["error"]
error_message = OpenRouterErrorMessage(
message="Message: {}, Metadata: {}, User ID: {}".format(
error_chunk["message"],
error_chunk.get("metadata", {}),
error_chunk.get("user_id", ""),
),
code=error_chunk["code"],
metadata=error_chunk.get("metadata", {}),
)
raise OpenRouterException(
message=error_message["message"],
status_code=error_message["code"],
headers=error_message["metadata"].get("headers", {}),
)
new_choices = []
for choice in chunk["choices"]:
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")

View file

@ -127,21 +127,25 @@ class AWSEventStreamDecoder:
async for chunk in iterator:
event_stream_buffer.add_data(chunk)
for event in event_stream_buffer:
message = self._parse_message_from_event(event)
if message:
verbose_logger.debug("sagemaker parsed chunk bytes %s", message)
# remove data: prefix and "\n\n" at the end
message = (
litellm.CustomStreamWrapper._strip_sse_data_from_chunk(message)
or ""
)
message = message.replace("\n\n", "")
try:
message = self._parse_message_from_event(event)
if message:
verbose_logger.debug(
"sagemaker parsed chunk bytes %s", message
)
# remove data: prefix and "\n\n" at the end
message = (
litellm.CustomStreamWrapper._strip_sse_data_from_chunk(
message
)
or ""
)
message = message.replace("\n\n", "")
# Accumulate JSON data
accumulated_json += message
# Accumulate JSON data
accumulated_json += message
# Try to parse the accumulated JSON
try:
# Try to parse the accumulated JSON
_data = json.loads(accumulated_json)
if self.is_messages_api:
yield self._chunk_parser_messages_api(chunk_data=_data)
@ -149,9 +153,19 @@ class AWSEventStreamDecoder:
yield self._chunk_parser(chunk_data=_data)
# Reset accumulated_json after successful parsing
accumulated_json = ""
except json.JSONDecodeError:
# If it's not valid JSON yet, continue to the next event
continue
except json.JSONDecodeError:
# If it's not valid JSON yet, continue to the next event
continue
except UnicodeDecodeError as e:
verbose_logger.warning(
f"UnicodeDecodeError: {e}. Attempting to combine with next event."
)
continue
except Exception as e:
verbose_logger.error(
f"Error parsing message: {e}. Attempting to combine with next event."
)
continue
# Handle any remaining data after the iterator is exhausted
if accumulated_json:
@ -167,6 +181,8 @@ class AWSEventStreamDecoder:
f"Warning: Unparseable JSON data remained: {accumulated_json}"
)
yield None
except Exception as e:
verbose_logger.error(f"Final error parsing accumulated JSON: {e}")
def _parse_message_from_event(self, event) -> Optional[str]:
response_dict = event.to_response_dict()

View file

@ -4453,6 +4453,42 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
"supports_tool_choice": true
},
"gemini-2.5-pro-exp-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_audio_input": true,
"supports_video_input": true,
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-pro-exp-02-05": {
"max_tokens": 8192,
"max_input_tokens": 2097152,
@ -10189,6 +10225,22 @@
"litellm_provider": "voyage",
"mode": "rerank"
},
"databricks/databricks-claude-3-7-sonnet": {
"max_tokens": 200000,
"max_input_tokens": 200000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.0000025,
"input_dbu_cost_per_token": 0.00003571,
"output_cost_per_token": 0.00017857,
"output_db_cost_per_token": 0.000214286,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_tool_choice": true
},
"databricks/databricks-meta-llama-3-1-405b-instruct": {
"max_tokens": 128000,
"max_input_tokens": 128000,
@ -10217,7 +10269,7 @@
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_tool_choice": true
},
"databricks/meta-llama-3.3-70b-instruct": {
"databricks/databricks-meta-llama-3-3-70b-instruct": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,

View file

@ -3,7 +3,7 @@ MCP Client Manager
This class is responsible for managing MCP SSE clients.
This is a Proxy
This is a Proxy
"""
import asyncio

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[20314,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-48a36e9c9b2cb488.js\",\"250\",\"static/chunks/250-601568e45a5ffece.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-e21d4be3d6c3c16e.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"soi--ciJeUE6G2Fk4NMBG\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/1f6915676624c422.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[38411,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"274\",\"static/chunks/274-bddaf0cf6c91e72f.js\",\"250\",\"static/chunks/250-dfc03a6fb4f0d254.js\",\"699\",\"static/chunks/699-87224ecba28f1f48.js\",\"931\",\"static/chunks/app/page-0f46d4a8b9bdf1c0.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Yb50LG5p7c9QpG54GIoFV\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/1f6915676624c422.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[20314,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","394","static/chunks/394-48a36e9c9b2cb488.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","931","static/chunks/app/page-e21d4be3d6c3c16e.js"],"default",1]
3:I[38411,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","274","static/chunks/274-bddaf0cf6c91e72f.js","250","static/chunks/250-dfc03a6fb4f0d254.js","699","static/chunks/699-87224ecba28f1f48.js","931","static/chunks/app/page-0f46d4a8b9bdf1c0.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-dfc03a6fb4f0d254.js","699","static/chunks/699-87224ecba28f1f48.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-601568e45a5ffece.js","461","static/chunks/app/onboarding/page-5110f2c6a3c9a2f4.js"],"default",1]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-dfc03a6fb4f0d254.js","461","static/chunks/app/onboarding/page-2bf7a26db5342dbf.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,33 +1,39 @@
model_list:
- model_name: "gpt-4o"
litellm_params:
model: azure/chatgpt-v-2
api_key: os.environ/AZURE_API_KEY
api_base: http://0.0.0.0:8090
rpm: 3
- model_name: "gpt-4o-mini-openai"
litellm_params:
model: gpt-4o-mini
api_key: os.environ/OPENAI_API_KEY
- model_name: "openai/*"
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
- model_name: "bedrock-nova"
litellm_params:
model: us.amazon.nova-pro-v1:0
- model_name: "gemini-2.0-flash"
litellm_params:
model: gemini/gemini-2.0-flash
api_key: os.environ/GEMINI_API_KEY
- model_name: "gpt-4o"
litellm_params:
model: azure/chatgpt-v-2
api_key: os.environ/AZURE_API_KEY
api_base: http://0.0.0.0:8090
rpm: 3
- model_name: "gpt-4o-mini-openai"
litellm_params:
model: gpt-4o-mini
api_key: os.environ/OPENAI_API_KEY
- model_name: "openai/*"
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
- model_name: "bedrock-nova"
litellm_params:
model: us.amazon.nova-pro-v1:0
- model_name: "gemini-2.0-flash"
litellm_params:
model: gemini/gemini-2.0-flash
api_key: os.environ/GEMINI_API_KEY
- model_name: openrouter_model
litellm_params:
model: openrouter/openrouter_model
api_key: os.environ/OPENROUTER_API_KEY
api_base: http://0.0.0.0:8090
litellm_settings:
num_retries: 0
callbacks: ["prometheus"]
# json_logs: true
# router_settings:
# routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
# redis_host: os.environ/REDIS_HOST
# redis_password: os.environ/REDIS_PASSWORD
# redis_port: os.environ/REDIS_PORT
router_settings:
routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
redis_host: os.environ/REDIS_HOST
redis_password: os.environ/REDIS_PASSWORD
redis_port: os.environ/REDIS_PORT

View file

@ -432,6 +432,7 @@ class LiteLLMRoutes(enum.Enum):
"/model/new",
"/model/update",
"/model/delete",
"/user/daily/activity",
] # routes that manage their own allowed/disallowed logic
## Org Admin Routes ##
@ -2736,6 +2737,8 @@ class DailyUserSpendTransaction(TypedDict):
completion_tokens: int
spend: float
api_requests: int
successful_requests: int
failed_requests: int
class DBSpendUpdateTransactions(TypedDict):
@ -2749,3 +2752,9 @@ class DBSpendUpdateTransactions(TypedDict):
team_list_transactions: Optional[Dict[str, float]]
team_member_list_transactions: Optional[Dict[str, float]]
org_list_transactions: Optional[Dict[str, float]]
class SpendUpdateQueueItem(TypedDict, total=False):
entity_type: Litellm_EntityType
entity_id: str
response_cost: Optional[float]

View file

@ -1,53 +0,0 @@
"""
Checks for LiteLLM service account keys
"""
from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
def check_if_token_is_service_account(valid_token: UserAPIKeyAuth) -> bool:
"""
Checks if the token is a service account
Returns:
bool: True if token is a service account
"""
if valid_token.metadata:
if "service_account_id" in valid_token.metadata:
return True
return False
async def service_account_checks(
valid_token: UserAPIKeyAuth, request_data: dict
) -> bool:
"""
If a virtual key is a service account, checks it's a valid service account
A token is a service account if it has a service_account_id in its metadata
Service Account Specific Checks:
- Check if required_params is set
"""
if check_if_token_is_service_account(valid_token) is not True:
return True
from litellm.proxy.proxy_server import general_settings
if "service_account_settings" in general_settings:
service_account_settings = general_settings["service_account_settings"]
if "enforced_params" in service_account_settings:
_enforced_params = service_account_settings["enforced_params"]
for param in _enforced_params:
if param not in request_data:
raise ProxyException(
type=ProxyErrorTypes.bad_request_error.value,
code=400,
param=param,
message=f"BadRequest please pass param={param} in request body. This is a required param for service account",
)
return True

View file

@ -49,7 +49,6 @@ from litellm.proxy.auth.auth_utils import (
from litellm.proxy.auth.handle_jwt import JWTAuthManager, JWTHandler
from litellm.proxy.auth.oauth2_check import check_oauth2_token
from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
from litellm.proxy.auth.service_account_checks import service_account_checks
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm.proxy.utils import PrismaClient, ProxyLogging
from litellm.types.services import ServiceTypes
@ -905,12 +904,6 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
else:
_team_obj = None
# Check 7: Check if key is a service account key
await service_account_checks(
valid_token=valid_token,
request_data=request_data,
)
user_api_key_cache.set_cache(
key=valid_token.team_id, value=_team_obj
) # save team table in cache - used for tpm/rpm limiting - tpm_rpm_limiter.py

View file

@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing:
"""
Common request processing logic for both chat completions and responses API endpoints
"""
verbose_proxy_logger.debug(
"Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
)

View file

@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
if request is None:
return None
if hasattr(request, "scope") and "parsed_body" in request.scope:
return request.scope["parsed_body"]
if (
hasattr(request, "scope")
and "parsed_body" in request.scope
and isinstance(request.scope["parsed_body"], tuple)
):
accepted_keys, parsed_body = request.scope["parsed_body"]
return {key: parsed_body[key] for key in accepted_keys}
return None
@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
try:
if request is None:
return
request.scope["parsed_body"] = parsed_body
request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
except Exception as e:
verbose_proxy_logger.debug(
"Unexpected error setting request parsed body - {}".format(e)

View file

@ -10,7 +10,7 @@ import os
import time
import traceback
from datetime import datetime, timedelta
from typing import TYPE_CHECKING, Any, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
import litellm
from litellm._logging import verbose_proxy_logger
@ -18,13 +18,19 @@ from litellm.caching import DualCache, RedisCache
from litellm.constants import DB_SPEND_UPDATE_JOB_NAME
from litellm.proxy._types import (
DB_CONNECTION_ERROR_TYPES,
DailyUserSpendTransaction,
DBSpendUpdateTransactions,
Litellm_EntityType,
LiteLLM_UserTable,
SpendLogsPayload,
SpendUpdateQueueItem,
)
from litellm.proxy.db.pod_lock_manager import PodLockManager
from litellm.proxy.db.redis_update_buffer import RedisUpdateBuffer
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
DailySpendUpdateQueue,
)
from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
from litellm.proxy.db.db_transaction_queue.redis_update_buffer import RedisUpdateBuffer
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
if TYPE_CHECKING:
from litellm.proxy.utils import PrismaClient, ProxyLogging
@ -48,10 +54,12 @@ class DBSpendUpdateWriter:
self.redis_cache = redis_cache
self.redis_update_buffer = RedisUpdateBuffer(redis_cache=self.redis_cache)
self.pod_lock_manager = PodLockManager(cronjob_id=DB_SPEND_UPDATE_JOB_NAME)
self.spend_update_queue = SpendUpdateQueue()
self.daily_spend_update_queue = DailySpendUpdateQueue()
@staticmethod
async def update_database(
# LiteLLM management object fields
self,
token: Optional[str],
user_id: Optional[str],
end_user_id: Optional[str],
@ -84,7 +92,7 @@ class DBSpendUpdateWriter:
hashed_token = token
asyncio.create_task(
DBSpendUpdateWriter._update_user_db(
self._update_user_db(
response_cost=response_cost,
user_id=user_id,
prisma_client=prisma_client,
@ -94,14 +102,14 @@ class DBSpendUpdateWriter:
)
)
asyncio.create_task(
DBSpendUpdateWriter._update_key_db(
self._update_key_db(
response_cost=response_cost,
hashed_token=hashed_token,
prisma_client=prisma_client,
)
)
asyncio.create_task(
DBSpendUpdateWriter._update_team_db(
self._update_team_db(
response_cost=response_cost,
team_id=team_id,
user_id=user_id,
@ -109,14 +117,14 @@ class DBSpendUpdateWriter:
)
)
asyncio.create_task(
DBSpendUpdateWriter._update_org_db(
self._update_org_db(
response_cost=response_cost,
org_id=org_id,
prisma_client=prisma_client,
)
)
if disable_spend_logs is False:
await DBSpendUpdateWriter._insert_spend_log_to_db(
await self._insert_spend_log_to_db(
kwargs=kwargs,
completion_response=completion_response,
start_time=start_time,
@ -135,56 +143,8 @@ class DBSpendUpdateWriter:
f"Error updating Prisma database: {traceback.format_exc()}"
)
@staticmethod
async def _update_transaction_list(
response_cost: Optional[float],
entity_id: Optional[str],
transaction_list: dict,
entity_type: Litellm_EntityType,
debug_msg: Optional[str] = None,
prisma_client: Optional[PrismaClient] = None,
) -> bool:
"""
Common helper method to update a transaction list for an entity
Args:
response_cost: The cost to add
entity_id: The ID of the entity to update
transaction_list: The transaction list dictionary to update
entity_type: The type of entity (from EntityType enum)
debug_msg: Optional custom debug message
Returns:
bool: True if update happened, False otherwise
"""
try:
if debug_msg:
verbose_proxy_logger.debug(debug_msg)
else:
verbose_proxy_logger.debug(
f"adding spend to {entity_type.value} db. Response cost: {response_cost}. {entity_type.value}_id: {entity_id}."
)
if prisma_client is None:
return False
if entity_id is None:
verbose_proxy_logger.debug(
f"track_cost_callback: {entity_type.value}_id is None. Not tracking spend for {entity_type.value}"
)
return False
transaction_list[entity_id] = response_cost + transaction_list.get(
entity_id, 0
)
return True
except Exception as e:
verbose_proxy_logger.info(
f"Update {entity_type.value.capitalize()} DB failed to execute - {str(e)}\n{traceback.format_exc()}"
)
raise e
@staticmethod
async def _update_key_db(
self,
response_cost: Optional[float],
hashed_token: Optional[str],
prisma_client: Optional[PrismaClient],
@ -193,13 +153,12 @@ class DBSpendUpdateWriter:
if hashed_token is None or prisma_client is None:
return
await DBSpendUpdateWriter._update_transaction_list(
response_cost=response_cost,
entity_id=hashed_token,
transaction_list=prisma_client.key_list_transactions,
entity_type=Litellm_EntityType.KEY,
debug_msg=f"adding spend to key db. Response cost: {response_cost}. Token: {hashed_token}.",
prisma_client=prisma_client,
await self.spend_update_queue.add_update(
update=SpendUpdateQueueItem(
entity_type=Litellm_EntityType.KEY,
entity_id=hashed_token,
response_cost=response_cost,
)
)
except Exception as e:
verbose_proxy_logger.exception(
@ -207,8 +166,8 @@ class DBSpendUpdateWriter:
)
raise e
@staticmethod
async def _update_user_db(
self,
response_cost: Optional[float],
user_id: Optional[str],
prisma_client: Optional[PrismaClient],
@ -234,21 +193,21 @@ class DBSpendUpdateWriter:
for _id in user_ids:
if _id is not None:
await DBSpendUpdateWriter._update_transaction_list(
response_cost=response_cost,
entity_id=_id,
transaction_list=prisma_client.user_list_transactions,
entity_type=Litellm_EntityType.USER,
prisma_client=prisma_client,
await self.spend_update_queue.add_update(
update=SpendUpdateQueueItem(
entity_type=Litellm_EntityType.USER,
entity_id=_id,
response_cost=response_cost,
)
)
if end_user_id is not None:
await DBSpendUpdateWriter._update_transaction_list(
response_cost=response_cost,
entity_id=end_user_id,
transaction_list=prisma_client.end_user_list_transactions,
entity_type=Litellm_EntityType.END_USER,
prisma_client=prisma_client,
await self.spend_update_queue.add_update(
update=SpendUpdateQueueItem(
entity_type=Litellm_EntityType.END_USER,
entity_id=end_user_id,
response_cost=response_cost,
)
)
except Exception as e:
verbose_proxy_logger.info(
@ -256,8 +215,8 @@ class DBSpendUpdateWriter:
+ f"Update User DB call failed to execute {str(e)}\n{traceback.format_exc()}"
)
@staticmethod
async def _update_team_db(
self,
response_cost: Optional[float],
team_id: Optional[str],
user_id: Optional[str],
@ -270,12 +229,12 @@ class DBSpendUpdateWriter:
)
return
await DBSpendUpdateWriter._update_transaction_list(
response_cost=response_cost,
entity_id=team_id,
transaction_list=prisma_client.team_list_transactions,
entity_type=Litellm_EntityType.TEAM,
prisma_client=prisma_client,
await self.spend_update_queue.add_update(
update=SpendUpdateQueueItem(
entity_type=Litellm_EntityType.TEAM,
entity_id=team_id,
response_cost=response_cost,
)
)
try:
@ -283,12 +242,12 @@ class DBSpendUpdateWriter:
if user_id is not None:
# key is "team_id::<value>::user_id::<value>"
team_member_key = f"team_id::{team_id}::user_id::{user_id}"
await DBSpendUpdateWriter._update_transaction_list(
response_cost=response_cost,
entity_id=team_member_key,
transaction_list=prisma_client.team_member_list_transactions,
entity_type=Litellm_EntityType.TEAM_MEMBER,
prisma_client=prisma_client,
await self.spend_update_queue.add_update(
update=SpendUpdateQueueItem(
entity_type=Litellm_EntityType.TEAM_MEMBER,
entity_id=team_member_key,
response_cost=response_cost,
)
)
except Exception:
pass
@ -298,8 +257,8 @@ class DBSpendUpdateWriter:
)
raise e
@staticmethod
async def _update_org_db(
self,
response_cost: Optional[float],
org_id: Optional[str],
prisma_client: Optional[PrismaClient],
@ -311,12 +270,12 @@ class DBSpendUpdateWriter:
)
return
await DBSpendUpdateWriter._update_transaction_list(
response_cost=response_cost,
entity_id=org_id,
transaction_list=prisma_client.org_list_transactions,
entity_type=Litellm_EntityType.ORGANIZATION,
prisma_client=prisma_client,
await self.spend_update_queue.add_update(
update=SpendUpdateQueueItem(
entity_type=Litellm_EntityType.ORGANIZATION,
entity_id=org_id,
response_cost=response_cost,
)
)
except Exception as e:
verbose_proxy_logger.info(
@ -324,8 +283,8 @@ class DBSpendUpdateWriter:
)
raise e
@staticmethod
async def _insert_spend_log_to_db(
self,
kwargs: Optional[dict],
completion_response: Optional[Union[litellm.ModelResponse, Any, Exception]],
start_time: Optional[datetime],
@ -346,7 +305,7 @@ class DBSpendUpdateWriter:
end_time=end_time,
)
payload["spend"] = response_cost or 0.0
DBSpendUpdateWriter._set_spend_logs_payload(
await self._set_spend_logs_payload(
payload=payload,
spend_logs_url=os.getenv("SPEND_LOGS_URL"),
prisma_client=prisma_client,
@ -357,8 +316,8 @@ class DBSpendUpdateWriter:
)
raise e
@staticmethod
def _set_spend_logs_payload(
async def _set_spend_logs_payload(
self,
payload: Union[dict, SpendLogsPayload],
prisma_client: PrismaClient,
spend_logs_url: Optional[str] = None,
@ -377,8 +336,9 @@ class DBSpendUpdateWriter:
elif prisma_client is not None:
prisma_client.spend_log_transactions.append(payload)
prisma_client.add_spend_log_transaction_to_daily_user_transaction(
payload.copy()
await self.add_spend_log_transaction_to_daily_user_transaction(
payload=payload.copy(),
prisma_client=prisma_client,
)
return prisma_client
@ -435,7 +395,8 @@ class DBSpendUpdateWriter:
- Only 1 pod will commit to db at a time (based on if it can acquire the lock over writing to DB)
"""
await self.redis_update_buffer.store_in_memory_spend_updates_in_redis(
prisma_client=prisma_client,
spend_update_queue=self.spend_update_queue,
daily_spend_update_queue=self.daily_spend_update_queue,
)
# Only commit from redis to db if this pod is the leader
@ -447,12 +408,23 @@ class DBSpendUpdateWriter:
await self.redis_update_buffer.get_all_update_transactions_from_redis_buffer()
)
if db_spend_update_transactions is not None:
await DBSpendUpdateWriter._commit_spend_updates_to_db(
await self._commit_spend_updates_to_db(
prisma_client=prisma_client,
n_retry_times=n_retry_times,
proxy_logging_obj=proxy_logging_obj,
db_spend_update_transactions=db_spend_update_transactions,
)
daily_spend_update_transactions = (
await self.redis_update_buffer.get_all_daily_spend_update_transactions_from_redis_buffer()
)
if daily_spend_update_transactions is not None:
await DBSpendUpdateWriter.update_daily_user_spend(
n_retry_times=n_retry_times,
prisma_client=prisma_client,
proxy_logging_obj=proxy_logging_obj,
daily_spend_transactions=daily_spend_update_transactions,
)
except Exception as e:
verbose_proxy_logger.error(f"Error committing spend updates: {e}")
finally:
@ -471,23 +443,34 @@ class DBSpendUpdateWriter:
Note: This flow causes Deadlocks in production (1K RPS+). Use self._commit_spend_updates_to_db_with_redis() instead if you expect 1K+ RPS.
"""
db_spend_update_transactions = DBSpendUpdateTransactions(
user_list_transactions=prisma_client.user_list_transactions,
end_user_list_transactions=prisma_client.end_user_list_transactions,
key_list_transactions=prisma_client.key_list_transactions,
team_list_transactions=prisma_client.team_list_transactions,
team_member_list_transactions=prisma_client.team_member_list_transactions,
org_list_transactions=prisma_client.org_list_transactions,
# Aggregate all in memory spend updates (key, user, end_user, team, team_member, org) and commit to db
################## Spend Update Transactions ##################
db_spend_update_transactions = (
await self.spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
await DBSpendUpdateWriter._commit_spend_updates_to_db(
await self._commit_spend_updates_to_db(
prisma_client=prisma_client,
n_retry_times=n_retry_times,
proxy_logging_obj=proxy_logging_obj,
db_spend_update_transactions=db_spend_update_transactions,
)
@staticmethod
################## Daily Spend Update Transactions ##################
# Aggregate all in memory daily spend transactions and commit to db
daily_spend_update_transactions = (
await self.daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
)
await DBSpendUpdateWriter.update_daily_user_spend(
n_retry_times=n_retry_times,
prisma_client=prisma_client,
proxy_logging_obj=proxy_logging_obj,
daily_spend_transactions=daily_spend_update_transactions,
)
async def _commit_spend_updates_to_db( # noqa: PLR0915
self,
prisma_client: PrismaClient,
n_retry_times: int,
proxy_logging_obj: ProxyLogging,
@ -526,9 +509,6 @@ class DBSpendUpdateWriter:
where={"user_id": user_id},
data={"spend": {"increment": response_cost}},
)
prisma_client.user_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break
except DB_CONNECTION_ERROR_TYPES as e:
if (
@ -561,6 +541,7 @@ class DBSpendUpdateWriter:
n_retry_times=n_retry_times,
prisma_client=prisma_client,
proxy_logging_obj=proxy_logging_obj,
end_user_list_transactions=end_user_list_transactions,
)
### UPDATE KEY TABLE ###
key_list_transactions = db_spend_update_transactions["key_list_transactions"]
@ -583,9 +564,6 @@ class DBSpendUpdateWriter:
where={"token": token},
data={"spend": {"increment": response_cost}},
)
prisma_client.key_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break
except DB_CONNECTION_ERROR_TYPES as e:
if (
@ -632,9 +610,6 @@ class DBSpendUpdateWriter:
where={"team_id": team_id},
data={"spend": {"increment": response_cost}},
)
prisma_client.team_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break
except DB_CONNECTION_ERROR_TYPES as e:
if (
@ -684,9 +659,6 @@ class DBSpendUpdateWriter:
where={"team_id": team_id, "user_id": user_id},
data={"spend": {"increment": response_cost}},
)
prisma_client.team_member_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break
except DB_CONNECTION_ERROR_TYPES as e:
if (
@ -725,9 +697,6 @@ class DBSpendUpdateWriter:
where={"organization_id": org_id},
data={"spend": {"increment": response_cost}},
)
prisma_client.org_list_transactions = (
{}
) # Clear the remaining transactions after processing all batches in the loop.
break
except DB_CONNECTION_ERROR_TYPES as e:
if (
@ -744,3 +713,192 @@ class DBSpendUpdateWriter:
_raise_failed_update_spend_exception(
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
)
@staticmethod
async def update_daily_user_spend(
n_retry_times: int,
prisma_client: PrismaClient,
proxy_logging_obj: ProxyLogging,
daily_spend_transactions: Dict[str, DailyUserSpendTransaction],
):
"""
Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
"""
from litellm.proxy.utils import _raise_failed_update_spend_exception
### UPDATE DAILY USER SPEND ###
verbose_proxy_logger.debug(
"Daily User Spend transactions: {}".format(len(daily_spend_transactions))
)
BATCH_SIZE = (
100 # Number of aggregated records to update in each database operation
)
start_time = time.time()
try:
for i in range(n_retry_times + 1):
try:
# Get transactions to process
transactions_to_process = dict(
list(daily_spend_transactions.items())[:BATCH_SIZE]
)
if len(transactions_to_process) == 0:
verbose_proxy_logger.debug(
"No new transactions to process for daily spend update"
)
break
# Update DailyUserSpend table in batches
async with prisma_client.db.batch_() as batcher:
for _, transaction in transactions_to_process.items():
user_id = transaction.get("user_id")
if not user_id: # Skip if no user_id
continue
batcher.litellm_dailyuserspend.upsert(
where={
"user_id_date_api_key_model_custom_llm_provider": {
"user_id": user_id,
"date": transaction["date"],
"api_key": transaction["api_key"],
"model": transaction["model"],
"custom_llm_provider": transaction.get(
"custom_llm_provider"
),
}
},
data={
"create": {
"user_id": user_id,
"date": transaction["date"],
"api_key": transaction["api_key"],
"model": transaction["model"],
"model_group": transaction.get("model_group"),
"custom_llm_provider": transaction.get(
"custom_llm_provider"
),
"prompt_tokens": transaction["prompt_tokens"],
"completion_tokens": transaction[
"completion_tokens"
],
"spend": transaction["spend"],
"api_requests": transaction["api_requests"],
"successful_requests": transaction[
"successful_requests"
],
"failed_requests": transaction[
"failed_requests"
],
},
"update": {
"prompt_tokens": {
"increment": transaction["prompt_tokens"]
},
"completion_tokens": {
"increment": transaction[
"completion_tokens"
]
},
"spend": {"increment": transaction["spend"]},
"api_requests": {
"increment": transaction["api_requests"]
},
"successful_requests": {
"increment": transaction[
"successful_requests"
]
},
"failed_requests": {
"increment": transaction["failed_requests"]
},
},
},
)
verbose_proxy_logger.info(
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
)
# Remove processed transactions
for key in transactions_to_process.keys():
daily_spend_transactions.pop(key, None)
verbose_proxy_logger.debug(
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
)
break
except DB_CONNECTION_ERROR_TYPES as e:
if i >= n_retry_times:
_raise_failed_update_spend_exception(
e=e,
start_time=start_time,
proxy_logging_obj=proxy_logging_obj,
)
await asyncio.sleep(2**i) # Exponential backoff
except Exception as e:
# Remove processed transactions even if there was an error
if "transactions_to_process" in locals():
for key in transactions_to_process.keys(): # type: ignore
daily_spend_transactions.pop(key, None)
_raise_failed_update_spend_exception(
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
)
async def add_spend_log_transaction_to_daily_user_transaction(
self,
payload: Union[dict, SpendLogsPayload],
prisma_client: PrismaClient,
):
"""
Add a spend log transaction to the `daily_spend_update_queue`
Key = @@unique([user_id, date, api_key, model, custom_llm_provider]) )
If key exists, update the transaction with the new spend and usage
"""
expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"]
if not all(key in payload for key in expected_keys):
verbose_proxy_logger.debug(
f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions"
)
return
request_status = prisma_client.get_request_status(payload)
verbose_proxy_logger.info(f"Logged request status: {request_status}")
if isinstance(payload["startTime"], datetime):
start_time = payload["startTime"].isoformat()
date = start_time.split("T")[0]
elif isinstance(payload["startTime"], str):
date = payload["startTime"].split("T")[0]
else:
verbose_proxy_logger.debug(
f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions"
)
return
try:
daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}"
daily_transaction = DailyUserSpendTransaction(
user_id=payload["user"],
date=date,
api_key=payload["api_key"],
model=payload["model"],
model_group=payload["model_group"],
custom_llm_provider=payload["custom_llm_provider"],
prompt_tokens=payload["prompt_tokens"],
completion_tokens=payload["completion_tokens"],
spend=payload["spend"],
api_requests=1,
successful_requests=1 if request_status == "success" else 0,
failed_requests=1 if request_status != "success" else 0,
)
await self.daily_spend_update_queue.add_update(
update={daily_transaction_key: daily_transaction}
)
except Exception as e:
raise e

View file

@ -0,0 +1,25 @@
"""
Base class for in memory buffer for database transactions
"""
import asyncio
from litellm._logging import verbose_proxy_logger
class BaseUpdateQueue:
"""Base class for in memory buffer for database transactions"""
def __init__(self):
self.update_queue = asyncio.Queue()
async def add_update(self, update):
"""Enqueue an update."""
verbose_proxy_logger.debug("Adding update to queue: %s", update)
await self.update_queue.put(update)
async def flush_all_updates_from_in_memory_queue(self):
"""Get all updates from the queue."""
updates = []
while not self.update_queue.empty():
updates.append(await self.update_queue.get())
return updates

View file

@ -0,0 +1,95 @@
import asyncio
from typing import Dict, List
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import DailyUserSpendTransaction
from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
class DailySpendUpdateQueue(BaseUpdateQueue):
"""
In memory buffer for daily spend updates that should be committed to the database
To add a new daily spend update transaction, use the following format:
daily_spend_update_queue.add_update({
"user1_date_api_key_model_custom_llm_provider": {
"spend": 10,
"prompt_tokens": 100,
"completion_tokens": 100,
}
})
Queue contains a list of daily spend update transactions
eg
queue = [
{
"user1_date_api_key_model_custom_llm_provider": {
"spend": 10,
"prompt_tokens": 100,
"completion_tokens": 100,
"api_requests": 100,
"successful_requests": 100,
"failed_requests": 100,
}
},
{
"user2_date_api_key_model_custom_llm_provider": {
"spend": 10,
"prompt_tokens": 100,
"completion_tokens": 100,
"api_requests": 100,
"successful_requests": 100,
"failed_requests": 100,
}
}
]
"""
def __init__(self):
super().__init__()
self.update_queue: asyncio.Queue[
Dict[str, DailyUserSpendTransaction]
] = asyncio.Queue()
async def flush_and_get_aggregated_daily_spend_update_transactions(
self,
) -> Dict[str, DailyUserSpendTransaction]:
"""Get all updates from the queue and return all updates aggregated by daily_transaction_key."""
updates = await self.flush_all_updates_from_in_memory_queue()
aggregated_daily_spend_update_transactions = (
DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
updates
)
)
verbose_proxy_logger.debug(
"Aggregated daily spend update transactions: %s",
aggregated_daily_spend_update_transactions,
)
return aggregated_daily_spend_update_transactions
@staticmethod
def get_aggregated_daily_spend_update_transactions(
updates: List[Dict[str, DailyUserSpendTransaction]]
) -> Dict[str, DailyUserSpendTransaction]:
"""Aggregate updates by daily_transaction_key."""
aggregated_daily_spend_update_transactions: Dict[
str, DailyUserSpendTransaction
] = {}
for _update in updates:
for _key, payload in _update.items():
if _key in aggregated_daily_spend_update_transactions:
daily_transaction = aggregated_daily_spend_update_transactions[_key]
daily_transaction["spend"] += payload["spend"]
daily_transaction["prompt_tokens"] += payload["prompt_tokens"]
daily_transaction["completion_tokens"] += payload[
"completion_tokens"
]
daily_transaction["api_requests"] += payload["api_requests"]
daily_transaction["successful_requests"] += payload[
"successful_requests"
]
daily_transaction["failed_requests"] += payload["failed_requests"]
else:
aggregated_daily_spend_update_transactions[_key] = payload
return aggregated_daily_spend_update_transactions

View file

@ -9,9 +9,17 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from litellm._logging import verbose_proxy_logger
from litellm.caching import RedisCache
from litellm.constants import MAX_REDIS_BUFFER_DEQUEUE_COUNT, REDIS_UPDATE_BUFFER_KEY
from litellm.constants import (
MAX_REDIS_BUFFER_DEQUEUE_COUNT,
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
REDIS_UPDATE_BUFFER_KEY,
)
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
from litellm.proxy._types import DBSpendUpdateTransactions
from litellm.proxy._types import DailyUserSpendTransaction, DBSpendUpdateTransactions
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
DailySpendUpdateQueue,
)
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
from litellm.secret_managers.main import str_to_bool
if TYPE_CHECKING:
@ -54,37 +62,70 @@ class RedisUpdateBuffer:
async def store_in_memory_spend_updates_in_redis(
self,
prisma_client: PrismaClient,
spend_update_queue: SpendUpdateQueue,
daily_spend_update_queue: DailySpendUpdateQueue,
):
"""
Stores the in-memory spend updates to Redis
Each transaction is a dict stored as following:
- key is the entity id
- value is the spend amount
Stores the following in memory data structures in Redis:
- SpendUpdateQueue - Key, User, Team, TeamMember, Org, EndUser Spend updates
- DailySpendUpdateQueue - Daily Spend updates Aggregate view
```
Redis List:
key_list_transactions:
[
"0929880201": 1.2,
"0929880202": 0.01,
"0929880203": 0.001,
]
```
For SpendUpdateQueue:
Each transaction is a dict stored as following:
- key is the entity id
- value is the spend amount
```
Redis List:
key_list_transactions:
[
"0929880201": 1.2,
"0929880202": 0.01,
"0929880203": 0.001,
]
```
For DailySpendUpdateQueue:
Each transaction is a Dict[str, DailyUserSpendTransaction] stored as following:
- key is the daily_transaction_key
- value is the DailyUserSpendTransaction
```
Redis List:
daily_spend_update_transactions:
[
{
"user_keyhash_1_model_1": {
"spend": 1.2,
"prompt_tokens": 1000,
"completion_tokens": 1000,
"api_requests": 1000,
"successful_requests": 1000,
},
}
]
```
"""
if self.redis_cache is None:
verbose_proxy_logger.debug(
"redis_cache is None, skipping store_in_memory_spend_updates_in_redis"
)
return
db_spend_update_transactions: DBSpendUpdateTransactions = DBSpendUpdateTransactions(
user_list_transactions=prisma_client.user_list_transactions,
end_user_list_transactions=prisma_client.end_user_list_transactions,
key_list_transactions=prisma_client.key_list_transactions,
team_list_transactions=prisma_client.team_list_transactions,
team_member_list_transactions=prisma_client.team_member_list_transactions,
org_list_transactions=prisma_client.org_list_transactions,
db_spend_update_transactions = (
await spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
verbose_proxy_logger.debug(
"ALL DB SPEND UPDATE TRANSACTIONS: %s", db_spend_update_transactions
)
daily_spend_update_transactions = (
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
)
verbose_proxy_logger.debug(
"ALL DAILY SPEND UPDATE TRANSACTIONS: %s", daily_spend_update_transactions
)
# only store in redis if there are any updates to commit
@ -100,8 +141,13 @@ class RedisUpdateBuffer:
values=list_of_transactions,
)
# clear the in-memory spend updates
RedisUpdateBuffer._clear_all_in_memory_spend_updates(prisma_client)
list_of_daily_spend_update_transactions = [
safe_dumps(daily_spend_update_transactions)
]
await self.redis_cache.async_rpush(
key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
values=list_of_daily_spend_update_transactions,
)
@staticmethod
def _number_of_transactions_to_store_in_redis(
@ -116,20 +162,6 @@ class RedisUpdateBuffer:
num_transactions += len(v)
return num_transactions
@staticmethod
def _clear_all_in_memory_spend_updates(
prisma_client: PrismaClient,
):
"""
Clears all in-memory spend updates
"""
prisma_client.user_list_transactions = {}
prisma_client.end_user_list_transactions = {}
prisma_client.key_list_transactions = {}
prisma_client.team_list_transactions = {}
prisma_client.team_member_list_transactions = {}
prisma_client.org_list_transactions = {}
@staticmethod
def _remove_prefix_from_keys(data: Dict[str, Any], prefix: str) -> Dict[str, Any]:
"""
@ -197,6 +229,27 @@ class RedisUpdateBuffer:
return combined_transaction
async def get_all_daily_spend_update_transactions_from_redis_buffer(
self,
) -> Optional[Dict[str, DailyUserSpendTransaction]]:
"""
Gets all the daily spend update transactions from Redis
"""
if self.redis_cache is None:
return None
list_of_transactions = await self.redis_cache.async_lpop(
key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
count=MAX_REDIS_BUFFER_DEQUEUE_COUNT,
)
if list_of_transactions is None:
return None
list_of_daily_spend_update_transactions = [
json.loads(transaction) for transaction in list_of_transactions
]
return DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
list_of_daily_spend_update_transactions
)
@staticmethod
def _parse_list_of_transactions(
list_of_transactions: Union[Any, List[Any]],

View file

@ -0,0 +1,113 @@
import asyncio
from typing import List
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import (
DBSpendUpdateTransactions,
Litellm_EntityType,
SpendUpdateQueueItem,
)
from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
class SpendUpdateQueue(BaseUpdateQueue):
"""
In memory buffer for spend updates that should be committed to the database
"""
def __init__(self):
super().__init__()
self.update_queue: asyncio.Queue[SpendUpdateQueueItem] = asyncio.Queue()
async def flush_and_get_aggregated_db_spend_update_transactions(
self,
) -> DBSpendUpdateTransactions:
"""Flush all updates from the queue and return all updates aggregated by entity type."""
updates = await self.flush_all_updates_from_in_memory_queue()
verbose_proxy_logger.debug("Aggregating updates by entity type: %s", updates)
return self.get_aggregated_db_spend_update_transactions(updates)
def get_aggregated_db_spend_update_transactions(
self, updates: List[SpendUpdateQueueItem]
) -> DBSpendUpdateTransactions:
"""Aggregate updates by entity type."""
# Initialize all transaction lists as empty dicts
db_spend_update_transactions = DBSpendUpdateTransactions(
user_list_transactions={},
end_user_list_transactions={},
key_list_transactions={},
team_list_transactions={},
team_member_list_transactions={},
org_list_transactions={},
)
# Map entity types to their corresponding transaction dictionary keys
entity_type_to_dict_key = {
Litellm_EntityType.USER: "user_list_transactions",
Litellm_EntityType.END_USER: "end_user_list_transactions",
Litellm_EntityType.KEY: "key_list_transactions",
Litellm_EntityType.TEAM: "team_list_transactions",
Litellm_EntityType.TEAM_MEMBER: "team_member_list_transactions",
Litellm_EntityType.ORGANIZATION: "org_list_transactions",
}
for update in updates:
entity_type = update.get("entity_type")
entity_id = update.get("entity_id") or ""
response_cost = update.get("response_cost") or 0
if entity_type is None:
verbose_proxy_logger.debug(
"Skipping update spend for update: %s, because entity_type is None",
update,
)
continue
dict_key = entity_type_to_dict_key.get(entity_type)
if dict_key is None:
verbose_proxy_logger.debug(
"Skipping update spend for update: %s, because entity_type is not in entity_type_to_dict_key",
update,
)
continue # Skip unknown entity types
# Type-safe access using if/elif statements
if dict_key == "user_list_transactions":
transactions_dict = db_spend_update_transactions[
"user_list_transactions"
]
elif dict_key == "end_user_list_transactions":
transactions_dict = db_spend_update_transactions[
"end_user_list_transactions"
]
elif dict_key == "key_list_transactions":
transactions_dict = db_spend_update_transactions[
"key_list_transactions"
]
elif dict_key == "team_list_transactions":
transactions_dict = db_spend_update_transactions[
"team_list_transactions"
]
elif dict_key == "team_member_list_transactions":
transactions_dict = db_spend_update_transactions[
"team_member_list_transactions"
]
elif dict_key == "org_list_transactions":
transactions_dict = db_spend_update_transactions[
"org_list_transactions"
]
else:
continue
if transactions_dict is None:
transactions_dict = {}
# type ignore: dict_key is guaranteed to be one of "one of ("user_list_transactions", "end_user_list_transactions", "key_list_transactions", "team_list_transactions", "team_member_list_transactions", "org_list_transactions")"
db_spend_update_transactions[dict_key] = transactions_dict # type: ignore
if entity_id not in transactions_dict:
transactions_dict[entity_id] = 0
transactions_dict[entity_id] += response_cost or 0
return db_spend_update_transactions

View file

@ -0,0 +1,15 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
use_redis_transaction_buffer: true
litellm_settings:
cache: True
cache_params:
type: redis
supported_call_types: []

View file

@ -14,6 +14,7 @@ from pydantic import BaseModel
from websockets.asyncio.client import ClientConnection, connect
from litellm import DualCache
from litellm._version import version as litellm_version
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.llms.custom_httpx.http_handler import (
@ -75,7 +76,9 @@ class AimGuardrail(CustomGuardrail):
) -> Union[Exception, str, dict, None]:
verbose_proxy_logger.debug("Inside AIM Pre-Call Hook")
await self.call_aim_guardrail(data, hook="pre_call")
await self.call_aim_guardrail(
data, hook="pre_call", key_alias=user_api_key_dict.key_alias
)
return data
async def async_moderation_hook(
@ -93,15 +96,18 @@ class AimGuardrail(CustomGuardrail):
) -> Union[Exception, str, dict, None]:
verbose_proxy_logger.debug("Inside AIM Moderation Hook")
await self.call_aim_guardrail(data, hook="moderation")
await self.call_aim_guardrail(
data, hook="moderation", key_alias=user_api_key_dict.key_alias
)
return data
async def call_aim_guardrail(self, data: dict, hook: str) -> None:
async def call_aim_guardrail(
self, data: dict, hook: str, key_alias: Optional[str]
) -> None:
user_email = data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
headers = {
"Authorization": f"Bearer {self.api_key}",
"x-aim-litellm-hook": hook,
} | ({"x-aim-user-email": user_email} if user_email else {})
headers = self._build_aim_headers(
hook=hook, key_alias=key_alias, user_email=user_email
)
response = await self.async_handler.post(
f"{self.api_base}/detect/openai",
headers=headers,
@ -120,18 +126,16 @@ class AimGuardrail(CustomGuardrail):
raise HTTPException(status_code=400, detail=res["detection_message"])
async def call_aim_guardrail_on_output(
self, request_data: dict, output: str, hook: str
self, request_data: dict, output: str, hook: str, key_alias: Optional[str]
) -> Optional[str]:
user_email = (
request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
)
headers = {
"Authorization": f"Bearer {self.api_key}",
"x-aim-litellm-hook": hook,
} | ({"x-aim-user-email": user_email} if user_email else {})
response = await self.async_handler.post(
f"{self.api_base}/detect/output",
headers=headers,
headers=self._build_aim_headers(
hook=hook, key_alias=key_alias, user_email=user_email
),
json={"output": output, "messages": request_data.get("messages", [])},
)
response.raise_for_status()
@ -147,6 +151,32 @@ class AimGuardrail(CustomGuardrail):
return res["detection_message"]
return None
def _build_aim_headers(
self, *, hook: str, key_alias: Optional[str], user_email: Optional[str]
):
"""
A helper function to build the http headers that are required by AIM guardrails.
"""
return (
{
"Authorization": f"Bearer {self.api_key}",
# Used by Aim to apply only the guardrails that should be applied in a specific request phase.
"x-aim-litellm-hook": hook,
# Used by Aim to track LiteLLM version and provide backward compatibility.
"x-aim-litellm-version": litellm_version,
}
# Used by Aim to track guardrails violations by user.
| ({"x-aim-user-email": user_email} if user_email else {})
| (
{
# Used by Aim apply only the guardrails that are associated with the key alias.
"x-aim-litellm-key-alias": key_alias,
}
if key_alias
else {}
)
)
async def async_post_call_success_hook(
self,
data: dict,
@ -160,7 +190,7 @@ class AimGuardrail(CustomGuardrail):
):
content = response.choices[0].message.content or ""
detection = await self.call_aim_guardrail_on_output(
data, content, hook="output"
data, content, hook="output", key_alias=user_api_key_dict.key_alias
)
if detection:
raise HTTPException(status_code=400, detail=detection)
@ -174,11 +204,13 @@ class AimGuardrail(CustomGuardrail):
user_email = (
request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
)
headers = {
"Authorization": f"Bearer {self.api_key}",
} | ({"x-aim-user-email": user_email} if user_email else {})
async with connect(
f"{self.ws_api_base}/detect/output/ws", additional_headers=headers
f"{self.ws_api_base}/detect/output/ws",
additional_headers=self._build_aim_headers(
hook="output",
key_alias=user_api_key_dict.key_alias,
user_email=user_email,
),
) as websocket:
sender = asyncio.create_task(
self.forward_the_stream_to_aim(websocket, response)

View file

@ -13,7 +13,6 @@ from litellm.litellm_core_utils.core_helpers import (
from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.auth.auth_checks import log_db_metrics
from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
from litellm.proxy.utils import ProxyUpdateSpend
from litellm.types.utils import (
StandardLoggingPayload,
@ -37,6 +36,8 @@ class _ProxyDBLogger(CustomLogger):
if _ProxyDBLogger._should_track_errors_in_db() is False:
return
from litellm.proxy.proxy_server import proxy_logging_obj
_metadata = dict(
StandardLoggingUserAPIKeyMetadata(
user_api_key_hash=user_api_key_dict.api_key,
@ -66,7 +67,7 @@ class _ProxyDBLogger(CustomLogger):
request_data.get("proxy_server_request") or {}
)
request_data["litellm_params"]["metadata"] = existing_metadata
await DBSpendUpdateWriter.update_database(
await proxy_logging_obj.db_spend_update_writer.update_database(
token=user_api_key_dict.api_key,
response_cost=0.0,
user_id=user_api_key_dict.user_id,
@ -136,7 +137,7 @@ class _ProxyDBLogger(CustomLogger):
end_user_id=end_user_id,
):
## UPDATE DATABASE
await DBSpendUpdateWriter.update_database(
await proxy_logging_obj.db_spend_update_writer.update_database(
token=user_api_key,
response_cost=response_cost,
user_id=user_id,

View file

@ -747,7 +747,10 @@ def _get_enforced_params(
enforced_params: Optional[list] = None
if general_settings is not None:
enforced_params = general_settings.get("enforced_params")
if "service_account_settings" in general_settings:
if (
"service_account_settings" in general_settings
and check_if_token_is_service_account(user_api_key_dict) is True
):
service_account_settings = general_settings["service_account_settings"]
if "enforced_params" in service_account_settings:
if enforced_params is None:
@ -760,6 +763,20 @@ def _get_enforced_params(
return enforced_params
def check_if_token_is_service_account(valid_token: UserAPIKeyAuth) -> bool:
"""
Checks if the token is a service account
Returns:
bool: True if token is a service account
"""
if valid_token.metadata:
if "service_account_id" in valid_token.metadata:
return True
return False
def _enforced_params_check(
request_body: dict,
general_settings: Optional[dict],

View file

@ -1259,19 +1259,43 @@ class SpendMetrics(BaseModel):
prompt_tokens: int = Field(default=0)
completion_tokens: int = Field(default=0)
total_tokens: int = Field(default=0)
successful_requests: int = Field(default=0)
failed_requests: int = Field(default=0)
api_requests: int = Field(default=0)
class MetricBase(BaseModel):
metrics: SpendMetrics
class MetricWithMetadata(MetricBase):
metadata: Dict[str, Any] = Field(default_factory=dict)
class KeyMetadata(BaseModel):
"""Metadata for a key"""
key_alias: Optional[str] = None
class KeyMetricWithMetadata(MetricBase):
"""Base class for metrics with additional metadata"""
metadata: KeyMetadata = Field(default_factory=KeyMetadata)
class BreakdownMetrics(BaseModel):
"""Breakdown of spend by different dimensions"""
models: Dict[str, SpendMetrics] = Field(default_factory=dict) # model -> metrics
providers: Dict[str, SpendMetrics] = Field(
models: Dict[str, MetricWithMetadata] = Field(
default_factory=dict
) # provider -> metrics
api_keys: Dict[str, SpendMetrics] = Field(
) # model -> {metrics, metadata}
providers: Dict[str, MetricWithMetadata] = Field(
default_factory=dict
) # api_key -> metrics
) # provider -> {metrics, metadata}
api_keys: Dict[str, KeyMetricWithMetadata] = Field(
default_factory=dict
) # api_key -> {metrics, metadata}
class DailySpendData(BaseModel):
@ -1284,7 +1308,10 @@ class DailySpendMetadata(BaseModel):
total_spend: float = Field(default=0.0)
total_prompt_tokens: int = Field(default=0)
total_completion_tokens: int = Field(default=0)
total_tokens: int = Field(default=0)
total_api_requests: int = Field(default=0)
total_successful_requests: int = Field(default=0)
total_failed_requests: int = Field(default=0)
page: int = Field(default=1)
total_pages: int = Field(default=1)
has_more: bool = Field(default=False)
@ -1307,6 +1334,8 @@ class LiteLLM_DailyUserSpend(BaseModel):
completion_tokens: int = 0
spend: float = 0.0
api_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
class GroupedData(TypedDict):
@ -1322,34 +1351,57 @@ def update_metrics(
group_metrics.completion_tokens += record.completion_tokens
group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens
group_metrics.api_requests += record.api_requests
group_metrics.successful_requests += record.successful_requests
group_metrics.failed_requests += record.failed_requests
return group_metrics
def update_breakdown_metrics(
breakdown: BreakdownMetrics, record: LiteLLM_DailyUserSpend
breakdown: BreakdownMetrics,
record: LiteLLM_DailyUserSpend,
model_metadata: Dict[str, Dict[str, Any]],
provider_metadata: Dict[str, Dict[str, Any]],
api_key_metadata: Dict[str, Dict[str, Any]],
) -> BreakdownMetrics:
"""Updates breakdown metrics for a single record using the existing update_metrics function"""
# Update model breakdown
if record.model not in breakdown.models:
breakdown.models[record.model] = SpendMetrics()
breakdown.models[record.model] = update_metrics(
breakdown.models[record.model], record
breakdown.models[record.model] = MetricWithMetadata(
metrics=SpendMetrics(),
metadata=model_metadata.get(
record.model, {}
), # Add any model-specific metadata here
)
breakdown.models[record.model].metrics = update_metrics(
breakdown.models[record.model].metrics, record
)
# Update provider breakdown
provider = record.custom_llm_provider or "unknown"
if provider not in breakdown.providers:
breakdown.providers[provider] = SpendMetrics()
breakdown.providers[provider] = update_metrics(
breakdown.providers[provider], record
breakdown.providers[provider] = MetricWithMetadata(
metrics=SpendMetrics(),
metadata=provider_metadata.get(
provider, {}
), # Add any provider-specific metadata here
)
breakdown.providers[provider].metrics = update_metrics(
breakdown.providers[provider].metrics, record
)
# Update api key breakdown
if record.api_key not in breakdown.api_keys:
breakdown.api_keys[record.api_key] = SpendMetrics()
breakdown.api_keys[record.api_key] = update_metrics(
breakdown.api_keys[record.api_key], record
breakdown.api_keys[record.api_key] = KeyMetricWithMetadata(
metrics=SpendMetrics(),
metadata=KeyMetadata(
key_alias=api_key_metadata.get(record.api_key, {}).get(
"key_alias", None
)
), # Add any api_key-specific metadata here
)
breakdown.api_keys[record.api_key].metrics = update_metrics(
breakdown.api_keys[record.api_key].metrics, record
)
return breakdown
@ -1428,6 +1480,14 @@ async def get_user_daily_activity(
if api_key:
where_conditions["api_key"] = api_key
if (
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
):
where_conditions[
"user_id"
] = user_api_key_dict.user_id # only allow access to own data
# Get total count for pagination
total_count = await prisma_client.db.litellm_dailyuserspend.count(
where=where_conditions
@ -1443,6 +1503,28 @@ async def get_user_daily_activity(
take=page_size,
)
daily_spend_data_pydantic_list = [
LiteLLM_DailyUserSpend(**record.model_dump()) for record in daily_spend_data
]
# Get all unique API keys from the spend data
api_keys = set()
for record in daily_spend_data_pydantic_list:
if record.api_key:
api_keys.add(record.api_key)
# Fetch key aliases in bulk
api_key_metadata: Dict[str, Dict[str, Any]] = {}
model_metadata: Dict[str, Dict[str, Any]] = {}
provider_metadata: Dict[str, Dict[str, Any]] = {}
if api_keys:
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
where={"token": {"in": list(api_keys)}}
)
api_key_metadata.update(
{k.token: {"key_alias": k.key_alias} for k in key_records}
)
# Process results
results = []
total_metrics = SpendMetrics()
@ -1450,7 +1532,7 @@ async def get_user_daily_activity(
# Group data by date and other dimensions
grouped_data: Dict[str, Dict[str, Any]] = {}
for record in daily_spend_data:
for record in daily_spend_data_pydantic_list:
date_str = record.date
if date_str not in grouped_data:
grouped_data[date_str] = {
@ -1464,7 +1546,11 @@ async def get_user_daily_activity(
)
# Update breakdowns
grouped_data[date_str]["breakdown"] = update_breakdown_metrics(
grouped_data[date_str]["breakdown"], record
grouped_data[date_str]["breakdown"],
record,
model_metadata,
provider_metadata,
api_key_metadata,
)
# Update total metrics
@ -1474,7 +1560,9 @@ async def get_user_daily_activity(
total_metrics.total_tokens += (
record.prompt_tokens + record.completion_tokens
)
total_metrics.api_requests += 1
total_metrics.api_requests += record.api_requests
total_metrics.successful_requests += record.successful_requests
total_metrics.failed_requests += record.failed_requests
# Convert grouped data to response format
for date_str, data in grouped_data.items():
@ -1495,7 +1583,10 @@ async def get_user_daily_activity(
total_spend=total_metrics.spend,
total_prompt_tokens=total_metrics.prompt_tokens,
total_completion_tokens=total_metrics.completion_tokens,
total_tokens=total_metrics.total_tokens,
total_api_requests=total_metrics.api_requests,
total_successful_requests=total_metrics.successful_requests,
total_failed_requests=total_metrics.failed_requests,
page=page,
total_pages=-(-total_count // page_size), # Ceiling division
has_more=(page * page_size) < total_count,

View file

@ -394,7 +394,7 @@ class ModelManagementAuthChecks:
@staticmethod
async def can_user_make_model_call(
model_params: Union[Deployment, updateDeployment],
model_params: Deployment,
user_api_key_dict: UserAPIKeyAuth,
prisma_client: PrismaClient,
premium_user: bool,
@ -723,8 +723,38 @@ async def update_model(
},
)
_model_id = None
_model_info = getattr(model_params, "model_info", None)
if _model_info is None:
raise Exception("model_info not provided")
_model_id = _model_info.id
if _model_id is None:
raise Exception("model_info.id not provided")
_existing_litellm_params = (
await prisma_client.db.litellm_proxymodeltable.find_unique(
where={"model_id": _model_id}
)
)
if _existing_litellm_params is None:
if (
llm_router is not None
and llm_router.get_deployment(model_id=_model_id) is not None
):
raise HTTPException(
status_code=400,
detail={
"error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit."
},
)
else:
raise Exception("model not found")
deployment = Deployment(**_existing_litellm_params.model_dump())
await ModelManagementAuthChecks.can_user_make_model_call(
model_params=model_params,
model_params=deployment,
user_api_key_dict=user_api_key_dict,
prisma_client=prisma_client,
premium_user=premium_user,
@ -732,31 +762,6 @@ async def update_model(
# update DB
if store_model_in_db is True:
_model_id = None
_model_info = getattr(model_params, "model_info", None)
if _model_info is None:
raise Exception("model_info not provided")
_model_id = _model_info.id
if _model_id is None:
raise Exception("model_info.id not provided")
_existing_litellm_params = (
await prisma_client.db.litellm_proxymodeltable.find_unique(
where={"model_id": _model_id}
)
)
if _existing_litellm_params is None:
if (
llm_router is not None
and llm_router.get_deployment(model_id=_model_id) is not None
):
raise HTTPException(
status_code=400,
detail={
"error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit."
},
)
raise Exception("model not found")
_existing_litellm_params_dict = dict(
_existing_litellm_params.litellm_params
)

View file

@ -1,15 +1,6 @@
model_list:
- model_name: gpt-4o
- model_name: fake-openai-endpoint
litellm_params:
model: openai/gpt-4o
api_key: sk-xxxxxxx
mcp_servers:
{
"zapier_mcp": {
"url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
},
"fetch": {
"url": "http://localhost:8000/sse"
}
}
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/

View file

@ -3308,15 +3308,6 @@ async def model_list(
tags=["chat/completions"],
responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
) # azure compatible endpoint
@backoff.on_exception(
backoff.expo,
Exception, # base exception to catch for the backoff
max_tries=global_max_parallel_request_retries, # maximum number of retries
max_time=global_max_parallel_request_retry_timeout, # maximum total time to retry for
on_backoff=on_backoff, # specifying the function to call on backoff
giveup=giveup,
logger=verbose_proxy_logger,
)
async def chat_completion( # noqa: PLR0915
request: Request,
fastapi_response: Response,

View file

@ -327,6 +327,8 @@ model LiteLLM_DailyUserSpend {
completion_tokens Int @default(0)
spend Float @default(0.0)
api_requests Int @default(0)
successful_requests Int @default(0)
failed_requests Int @default(0)
created_at DateTime @default(now())
updated_at DateTime @updatedAt
@ -352,4 +354,3 @@ enum JobStatus {
INACTIVE
}

View file

@ -10,14 +10,24 @@ import traceback
from datetime import datetime, timedelta
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, overload
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
Literal,
Optional,
Union,
cast,
overload,
)
from litellm.proxy._types import (
DB_CONNECTION_ERROR_TYPES,
CommonProxyErrors,
DailyUserSpendTransaction,
ProxyErrorTypes,
ProxyException,
SpendLogsMetadata,
SpendLogsPayload,
)
from litellm.types.guardrails import GuardrailEventHooks
@ -1100,14 +1110,7 @@ def jsonify_object(data: dict) -> dict:
class PrismaClient:
user_list_transactions: dict = {}
end_user_list_transactions: dict = {}
key_list_transactions: dict = {}
team_list_transactions: dict = {}
team_member_list_transactions: dict = {} # key is ["team_id" + "user_id"]
org_list_transactions: dict = {}
spend_log_transactions: List = []
daily_user_spend_transactions: Dict[str, DailyUserSpendTransaction] = {}
def __init__(
self,
@ -1145,62 +1148,40 @@ class PrismaClient:
) # Client to connect to Prisma db
verbose_proxy_logger.debug("Success - Created Prisma Client")
def add_spend_log_transaction_to_daily_user_transaction(
def get_request_status(
self, payload: Union[dict, SpendLogsPayload]
):
) -> Literal["success", "failure"]:
"""
Add a spend log transaction to the daily user transaction list
Determine if a request was successful or failed based on payload metadata.
Key = @@unique([user_id, date, api_key, model, custom_llm_provider]) )
Args:
payload (Union[dict, SpendLogsPayload]): Request payload containing metadata
If key exists, update the transaction with the new spend and usage
Returns:
Literal["success", "failure"]: Request status
"""
expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"]
if not all(key in payload for key in expected_keys):
verbose_proxy_logger.debug(
f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions"
)
return
if isinstance(payload["startTime"], datetime):
start_time = payload["startTime"].isoformat()
date = start_time.split("T")[0]
elif isinstance(payload["startTime"], str):
date = payload["startTime"].split("T")[0]
else:
verbose_proxy_logger.debug(
f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions"
)
return
try:
daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}"
if daily_transaction_key in self.daily_user_spend_transactions:
daily_transaction = self.daily_user_spend_transactions[
daily_transaction_key
]
daily_transaction["spend"] += payload["spend"]
daily_transaction["prompt_tokens"] += payload["prompt_tokens"]
daily_transaction["completion_tokens"] += payload["completion_tokens"]
daily_transaction["api_requests"] += 1
else:
daily_transaction = DailyUserSpendTransaction(
user_id=payload["user"],
date=date,
api_key=payload["api_key"],
model=payload["model"],
model_group=payload["model_group"],
custom_llm_provider=payload["custom_llm_provider"],
prompt_tokens=payload["prompt_tokens"],
completion_tokens=payload["completion_tokens"],
spend=payload["spend"],
api_requests=1,
# Get metadata and convert to dict if it's a JSON string
payload_metadata: Union[Dict, SpendLogsMetadata, str] = payload.get(
"metadata", {}
)
if isinstance(payload_metadata, str):
payload_metadata_json: Union[Dict, SpendLogsMetadata] = cast(
Dict, json.loads(payload_metadata)
)
else:
payload_metadata_json = payload_metadata
self.daily_user_spend_transactions[
daily_transaction_key
] = daily_transaction
except Exception as e:
raise e
# Check status in metadata dict
return (
"failure"
if payload_metadata_json.get("status") == "failure"
else "success"
)
except (json.JSONDecodeError, AttributeError):
# Default to success if metadata parsing fails
return "success"
def hash_token(self, token: str):
# Hash the string using SHA-256
@ -2422,7 +2403,10 @@ def _hash_token_if_needed(token: str) -> str:
class ProxyUpdateSpend:
@staticmethod
async def update_end_user_spend(
n_retry_times: int, prisma_client: PrismaClient, proxy_logging_obj: ProxyLogging
n_retry_times: int,
prisma_client: PrismaClient,
proxy_logging_obj: ProxyLogging,
end_user_list_transactions: Dict[str, float],
):
for i in range(n_retry_times + 1):
start_time = time.time()
@ -2434,7 +2418,7 @@ class ProxyUpdateSpend:
for (
end_user_id,
response_cost,
) in prisma_client.end_user_list_transactions.items():
) in end_user_list_transactions.items():
if litellm.max_end_user_budget is not None:
pass
batcher.litellm_endusertable.upsert(
@ -2461,10 +2445,6 @@ class ProxyUpdateSpend:
_raise_failed_update_spend_exception(
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
)
finally:
prisma_client.end_user_list_transactions = (
{}
) # reset the end user list transactions - prevent bad data from causing issues
@staticmethod
async def update_spend_logs(
@ -2538,120 +2518,6 @@ class ProxyUpdateSpend:
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
)
@staticmethod
async def update_daily_user_spend(
n_retry_times: int,
prisma_client: PrismaClient,
proxy_logging_obj: ProxyLogging,
):
"""
Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
"""
BATCH_SIZE = (
100 # Number of aggregated records to update in each database operation
)
start_time = time.time()
try:
for i in range(n_retry_times + 1):
try:
# Get transactions to process
transactions_to_process = dict(
list(prisma_client.daily_user_spend_transactions.items())[
:BATCH_SIZE
]
)
if len(transactions_to_process) == 0:
verbose_proxy_logger.debug(
"No new transactions to process for daily spend update"
)
break
# Update DailyUserSpend table in batches
async with prisma_client.db.batch_() as batcher:
for _, transaction in transactions_to_process.items():
user_id = transaction.get("user_id")
if not user_id: # Skip if no user_id
continue
batcher.litellm_dailyuserspend.upsert(
where={
"user_id_date_api_key_model_custom_llm_provider": {
"user_id": user_id,
"date": transaction["date"],
"api_key": transaction["api_key"],
"model": transaction["model"],
"custom_llm_provider": transaction.get(
"custom_llm_provider"
),
}
},
data={
"create": {
"user_id": user_id,
"date": transaction["date"],
"api_key": transaction["api_key"],
"model": transaction["model"],
"model_group": transaction.get("model_group"),
"custom_llm_provider": transaction.get(
"custom_llm_provider"
),
"prompt_tokens": transaction["prompt_tokens"],
"completion_tokens": transaction[
"completion_tokens"
],
"spend": transaction["spend"],
"api_requests": transaction["api_requests"],
},
"update": {
"prompt_tokens": {
"increment": transaction["prompt_tokens"]
},
"completion_tokens": {
"increment": transaction[
"completion_tokens"
]
},
"spend": {"increment": transaction["spend"]},
"api_requests": {
"increment": transaction["api_requests"]
},
},
},
)
verbose_proxy_logger.info(
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
)
# Remove processed transactions
for key in transactions_to_process.keys():
prisma_client.daily_user_spend_transactions.pop(key, None)
verbose_proxy_logger.debug(
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
)
break
except DB_CONNECTION_ERROR_TYPES as e:
if i >= n_retry_times:
_raise_failed_update_spend_exception(
e=e,
start_time=start_time,
proxy_logging_obj=proxy_logging_obj,
)
await asyncio.sleep(2**i) # Exponential backoff
except Exception as e:
# Remove processed transactions even if there was an error
if "transactions_to_process" in locals():
for key in transactions_to_process.keys(): # type: ignore
prisma_client.daily_user_spend_transactions.pop(key, None)
_raise_failed_update_spend_exception(
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
)
@staticmethod
def disable_spend_updates() -> bool:
"""
@ -2701,20 +2567,6 @@ async def update_spend( # noqa: PLR0915
db_writer_client=db_writer_client,
)
### UPDATE DAILY USER SPEND ###
verbose_proxy_logger.debug(
"Daily User Spend transactions: {}".format(
len(prisma_client.daily_user_spend_transactions)
)
)
if len(prisma_client.daily_user_spend_transactions) > 0:
await ProxyUpdateSpend.update_daily_user_spend(
n_retry_times=n_retry_times,
prisma_client=prisma_client,
proxy_logging_obj=proxy_logging_obj,
)
def _raise_failed_update_spend_exception(
e: Exception, start_time: float, proxy_logging_obj: ProxyLogging

View file

@ -0,0 +1,83 @@
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
from typing_extensions import TypeAlias
class AnthropicResponseTextBlock(TypedDict, total=False):
"""
Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
"""
citations: Optional[List[Dict[str, Any]]]
text: str
type: Literal["text"]
class AnthropicResponseToolUseBlock(TypedDict, total=False):
"""
Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
"""
id: Optional[str]
input: Optional[str]
name: Optional[str]
type: Literal["tool_use"]
class AnthropicResponseThinkingBlock(TypedDict, total=False):
"""
Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
"""
signature: Optional[str]
thinking: Optional[str]
type: Literal["thinking"]
class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
"""
Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
"""
data: Optional[str]
type: Literal["redacted_thinking"]
AnthropicResponseContentBlock: TypeAlias = Union[
AnthropicResponseTextBlock,
AnthropicResponseToolUseBlock,
AnthropicResponseThinkingBlock,
AnthropicResponseRedactedThinkingBlock,
]
class AnthropicUsage(TypedDict, total=False):
"""
Input and output tokens used in the request
"""
input_tokens: int
output_tokens: int
"""
Cache Tokens Used
"""
cache_creation_input_tokens: int
cache_read_input_tokens: int
class AnthropicMessagesResponse(TypedDict, total=False):
"""
Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
"""
content: Optional[List[AnthropicResponseContentBlock]]
id: str
model: Optional[str] # This represents the Model type from Anthropic
role: Optional[Literal["assistant"]]
stop_reason: Optional[
Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
]
stop_sequence: Optional[str]
type: Optional[Literal["message"]]
usage: Optional[AnthropicUsage]

View file

@ -1113,3 +1113,6 @@ ResponsesAPIStreamingResponse = Annotated[
],
Discriminator("type"),
]
REASONING_EFFORT = Literal["low", "medium", "high"]

View file

@ -0,0 +1,9 @@
import json
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
class OpenRouterErrorMessage(TypedDict):
message: str
code: int
metadata: Dict

View file

@ -5901,9 +5901,10 @@ class ModelResponseIterator:
class ModelResponseListIterator:
def __init__(self, model_responses):
def __init__(self, model_responses, delay: Optional[float] = None):
self.model_responses = model_responses
self.index = 0
self.delay = delay
# Sync iterator
def __iter__(self):
@ -5914,6 +5915,8 @@ class ModelResponseListIterator:
raise StopIteration
model_response = self.model_responses[self.index]
self.index += 1
if self.delay:
time.sleep(self.delay)
return model_response
# Async iterator
@ -5925,6 +5928,8 @@ class ModelResponseListIterator:
raise StopAsyncIteration
model_response = self.model_responses[self.index]
self.index += 1
if self.delay:
await asyncio.sleep(self.delay)
return model_response

View file

@ -4453,6 +4453,42 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
"supports_tool_choice": true
},
"gemini-2.5-pro-exp-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_audio_input": true,
"supports_video_input": true,
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-pro-exp-02-05": {
"max_tokens": 8192,
"max_input_tokens": 2097152,
@ -10189,6 +10225,22 @@
"litellm_provider": "voyage",
"mode": "rerank"
},
"databricks/databricks-claude-3-7-sonnet": {
"max_tokens": 200000,
"max_input_tokens": 200000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.0000025,
"input_dbu_cost_per_token": 0.00003571,
"output_cost_per_token": 0.00017857,
"output_db_cost_per_token": 0.000214286,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_tool_choice": true
},
"databricks/databricks-meta-llama-3-1-405b-instruct": {
"max_tokens": 128000,
"max_input_tokens": 128000,
@ -10217,7 +10269,7 @@
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
"supports_tool_choice": true
},
"databricks/meta-llama-3.3-70b-instruct": {
"databricks/databricks-meta-llama-3-3-70b-instruct": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,

View file

@ -2,6 +2,7 @@
warn_return_any = False
ignore_missing_imports = True
mypy_path = litellm/stubs
namespace_packages = True
[mypy-google.*]
ignore_missing_imports = True

87
poetry.lock generated
View file

@ -1151,69 +1151,6 @@ files = [
[package.extras]
protobuf = ["grpcio-tools (>=1.70.0)"]
[[package]]
name = "grpcio"
version = "1.71.0"
description = "HTTP/2-based RPC framework"
optional = true
python-versions = ">=3.9"
files = [
{file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"},
{file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"},
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea"},
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69"},
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73"},
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804"},
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6"},
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5"},
{file = "grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509"},
{file = "grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a"},
{file = "grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef"},
{file = "grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7"},
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7"},
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7"},
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e"},
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b"},
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7"},
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3"},
{file = "grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444"},
{file = "grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b"},
{file = "grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537"},
{file = "grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7"},
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec"},
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594"},
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c"},
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67"},
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db"},
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79"},
{file = "grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a"},
{file = "grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8"},
{file = "grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379"},
{file = "grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3"},
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db"},
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29"},
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4"},
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3"},
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b"},
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637"},
{file = "grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb"},
{file = "grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366"},
{file = "grpcio-1.71.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d"},
{file = "grpcio-1.71.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e"},
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033"},
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97"},
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d"},
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41"},
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3"},
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32"},
{file = "grpcio-1.71.0-cp39-cp39-win32.whl", hash = "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"},
{file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"},
{file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"},
]
[package.extras]
protobuf = ["grpcio-tools (>=1.71.0)"]
[[package]]
name = "grpcio-status"
version = "1.70.0"
@ -1230,22 +1167,6 @@ googleapis-common-protos = ">=1.5.5"
grpcio = ">=1.70.0"
protobuf = ">=5.26.1,<6.0dev"
[[package]]
name = "grpcio-status"
version = "1.71.0"
description = "Status proto mapping for gRPC"
optional = true
python-versions = ">=3.9"
files = [
{file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"},
{file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"},
]
[package.dependencies]
googleapis-common-protos = ">=1.5.5"
grpcio = ">=1.71.0"
protobuf = ">=5.26.1,<6.0dev"
[[package]]
name = "gunicorn"
version = "23.0.0"
@ -1678,13 +1599,13 @@ referencing = ">=0.31.0"
[[package]]
name = "litellm-proxy-extras"
version = "0.1.1"
version = "0.1.2"
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
optional = true
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
files = [
{file = "litellm_proxy_extras-0.1.1-py3-none-any.whl", hash = "sha256:2b3c4c5474bacbde2424c1cd13b21f85c65e9c4346f6159badd49a210eedef5c"},
{file = "litellm_proxy_extras-0.1.1.tar.gz", hash = "sha256:a1eb911ad2e3742238863d314a8bd6d02dd0cc213ba040b2c0593f132fbf3117"},
{file = "litellm_proxy_extras-0.1.2-py3-none-any.whl", hash = "sha256:2caa7bdba5a533cd1781b55e3f7c581138d2a5b68a7e6d737327669dd21d5e08"},
{file = "litellm_proxy_extras-0.1.2.tar.gz", hash = "sha256:218e97980ab5a34eed7dcd1564a910c9a790168d672cdec3c464eba9b7cb1518"},
]
[[package]]
@ -4135,4 +4056,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi",
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "16cbf20784776377805f5e33c6bc97dce76303132aa3d81c7e6fe743f0ee3fc1"
content-hash = "524b2f8276ba057f8dc8a79dd460c1a243ef4aece7c08a8bf344e029e07b8841"

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.65.1"
version = "1.65.2"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -55,7 +55,7 @@ websockets = {version = "^13.1.0", optional = true}
boto3 = {version = "1.34.34", optional = true}
redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
mcp = {version = "1.5.0", optional = true, python = ">=3.10"}
litellm-proxy-extras = {version = "0.1.1", optional = true}
litellm-proxy-extras = {version = "0.1.2", optional = true}
[tool.poetry.extras]
proxy = [
@ -117,7 +117,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.65.1"
version = "1.65.2"
version_files = [
"pyproject.toml:^version"
]

View file

@ -38,7 +38,7 @@ sentry_sdk==2.21.0 # for sentry error handling
detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests
cryptography==43.0.1
tzdata==2025.1 # IANA time zone database
litellm-proxy-extras==0.1.1 # for proxy extras - e.g. prisma migrations
litellm-proxy-extras==0.1.2 # for proxy extras - e.g. prisma migrations
### LITELLM PACKAGE DEPENDENCIES
python-dotenv==1.0.0 # for env

View file

@ -327,6 +327,8 @@ model LiteLLM_DailyUserSpend {
completion_tokens Int @default(0)
spend Float @default(0.0)
api_requests Int @default(0)
successful_requests Int @default(0)
failed_requests Int @default(0)
created_at DateTime @default(now())
updated_at DateTime @updatedAt
@ -351,3 +353,4 @@ enum JobStatus {
ACTIVE
INACTIVE
}

View file

@ -1,6 +1,7 @@
import json
import os
import sys
import time
from unittest.mock import MagicMock, Mock, patch
import pytest
@ -19,6 +20,7 @@ from litellm.types.utils import (
Delta,
ModelResponseStream,
PromptTokensDetailsWrapper,
StandardLoggingPayload,
StreamingChoices,
Usage,
)
@ -36,6 +38,22 @@ def initialized_custom_stream_wrapper() -> CustomStreamWrapper:
return streaming_handler
@pytest.fixture
def logging_obj() -> Logging:
import time
logging_obj = Logging(
model="my-random-model",
messages=[{"role": "user", "content": "Hey"}],
stream=True,
call_type="completion",
start_time=time.time(),
litellm_call_id="12345",
function_id="1245",
)
return logging_obj
bedrock_chunks = [
ModelResponseStream(
id="chatcmpl-d249def8-a78b-464c-87b5-3a6f43565292",
@ -577,3 +595,36 @@ def test_streaming_handler_with_stop_chunk(
**args, model_response=ModelResponseStream()
)
assert returned_chunk is None
@pytest.mark.asyncio
async def test_streaming_completion_start_time(logging_obj: Logging):
"""Test that the start time is set correctly"""
from litellm.integrations.custom_logger import CustomLogger
class MockCallback(CustomLogger):
pass
mock_callback = MockCallback()
litellm.success_callback = [mock_callback, "langfuse"]
completion_stream = ModelResponseListIterator(
model_responses=bedrock_chunks, delay=0.1
)
response = CustomStreamWrapper(
completion_stream=completion_stream,
model="bedrock/claude-3-5-sonnet-20240620-v1:0",
logging_obj=logging_obj,
)
async for chunk in response:
print(chunk)
await asyncio.sleep(2)
assert logging_obj.model_call_details["completion_start_time"] is not None
assert (
logging_obj.model_call_details["completion_start_time"]
< logging_obj.model_call_details["end_time"]
)

View file

@ -0,0 +1,81 @@
import json
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from litellm.llms.openrouter.chat.transformation import (
OpenRouterChatCompletionStreamingHandler,
OpenRouterException,
)
class TestOpenRouterChatCompletionStreamingHandler:
def test_chunk_parser_successful(self):
handler = OpenRouterChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
# Test input chunk
chunk = {
"id": "test_id",
"created": 1234567890,
"model": "test_model",
"choices": [
{"delta": {"content": "test content", "reasoning": "test reasoning"}}
],
}
# Parse chunk
result = handler.chunk_parser(chunk)
# Verify response
assert result.id == "test_id"
assert result.object == "chat.completion.chunk"
assert result.created == 1234567890
assert result.model == "test_model"
assert len(result.choices) == 1
assert result.choices[0]["delta"]["reasoning_content"] == "test reasoning"
def test_chunk_parser_error_response(self):
handler = OpenRouterChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
# Test error chunk
error_chunk = {
"error": {
"message": "test error",
"code": 400,
"metadata": {"key": "value"},
"user_id": "test_user",
}
}
# Verify error handling
with pytest.raises(OpenRouterException) as exc_info:
handler.chunk_parser(error_chunk)
assert "Message: test error" in str(exc_info.value)
assert exc_info.value.status_code == 400
def test_chunk_parser_key_error(self):
handler = OpenRouterChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
# Test invalid chunk missing required fields
invalid_chunk = {"incomplete": "data"}
# Verify KeyError handling
with pytest.raises(OpenRouterException) as exc_info:
handler.chunk_parser(invalid_chunk)
assert "KeyError" in str(exc_info.value)
assert exc_info.value.status_code == 400

View file

@ -0,0 +1,97 @@
import json
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
sys.path.insert(0, os.path.abspath("../../../../.."))
from litellm.llms.sagemaker.common_utils import AWSEventStreamDecoder
@pytest.mark.asyncio
async def test_aiter_bytes_unicode_decode_error():
"""
Test that AWSEventStreamDecoder.aiter_bytes() does not raise an error when encountering invalid UTF-8 bytes. (UnicodeDecodeError)
Ensures stream processing continues despite the error.
Relevant issue: https://github.com/BerriAI/litellm/issues/9165
"""
# Create an instance of AWSEventStreamDecoder
decoder = AWSEventStreamDecoder(model="test-model")
# Create a mock event that will trigger a UnicodeDecodeError
mock_event = MagicMock()
mock_event.to_response_dict.return_value = {
"status_code": 200,
"headers": {},
"body": b"\xff\xfe", # Invalid UTF-8 bytes
}
# Create a mock EventStreamBuffer that yields our mock event
mock_buffer = MagicMock()
mock_buffer.__iter__.return_value = [mock_event]
# Mock the EventStreamBuffer class
with patch("botocore.eventstream.EventStreamBuffer", return_value=mock_buffer):
# Create an async generator that yields some test bytes
async def mock_iterator():
yield b""
# Process the stream
chunks = []
async for chunk in decoder.aiter_bytes(mock_iterator()):
if chunk is not None:
print("chunk=", chunk)
chunks.append(chunk)
# Verify that processing continued despite the error
# The chunks list should be empty since we only sent invalid data
assert len(chunks) == 0
@pytest.mark.asyncio
async def test_aiter_bytes_valid_chunk_followed_by_unicode_error():
"""
Test that valid chunks are processed correctly even when followed by Unicode decode errors.
This ensures errors don't corrupt or prevent processing of valid data that came before.
Relevant issue: https://github.com/BerriAI/litellm/issues/9165
"""
decoder = AWSEventStreamDecoder(model="test-model")
# Create two mock events - first valid, then invalid
mock_valid_event = MagicMock()
mock_valid_event.to_response_dict.return_value = {
"status_code": 200,
"headers": {},
"body": json.dumps({"token": {"text": "hello"}}).encode(), # Valid data first
}
mock_invalid_event = MagicMock()
mock_invalid_event.to_response_dict.return_value = {
"status_code": 200,
"headers": {},
"body": b"\xff\xfe", # Invalid UTF-8 bytes second
}
# Create a mock EventStreamBuffer that yields valid event first, then invalid
mock_buffer = MagicMock()
mock_buffer.__iter__.return_value = [mock_valid_event, mock_invalid_event]
with patch("botocore.eventstream.EventStreamBuffer", return_value=mock_buffer):
async def mock_iterator():
yield b"test_bytes"
chunks = []
async for chunk in decoder.aiter_bytes(mock_iterator()):
if chunk is not None:
chunks.append(chunk)
# Verify we got our valid chunk despite the subsequent error
assert len(chunks) == 1
assert chunks[0]["text"] == "hello" # Verify the content of the valid chunk

View file

@ -1,137 +0,0 @@
import os
import sys
from unittest.mock import MagicMock, patch
import pytest
sys.path.insert(
0, os.path.abspath("../../../..")
) # Adds the parent directory to the system path
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
def test_anthropic_prompt_caching_headers_for_vertex():
"""
Test that the prompt caching beta header is correctly set for Vertex AI requests
with Anthropic models when cache control is present in the messages.
"""
# Create an instance of AnthropicConfig
config = AnthropicConfig()
# Test case 1: Vertex request with prompt caching
# Create a message with cache control
messages = [
{
"role": "system",
"content": "You are a helpful assistant.",
"cache_control": {"type": "ephemeral"}
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]
# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages)
assert is_cache_control_set is True, "Cache control should be detected in messages"
# Generate headers for a Vertex AI request with prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)
# Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"
# Test case 2: Vertex request without prompt caching
messages_without_cache = [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]
# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages_without_cache)
assert is_cache_control_set is False, "Cache control should not be detected in messages"
# Generate headers for a Vertex AI request without prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)
# Verify that the anthropic-beta header is not set
assert "anthropic-beta" not in headers, "anthropic-beta header should not be present"
def test_anthropic_prompt_caching_with_content_blocks():
"""
Test that prompt caching is correctly detected when cache control is in content blocks.
"""
config = AnthropicConfig()
# Message with cache control in content blocks
messages = [
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are a helpful assistant.",
"cache_control": {"type": "ephemeral"}
}
]
},
{
"role": "user",
"content": "Tell me about the solar system."
}
]
# Check if cache control is detected
is_cache_control_set = config.is_cache_control_set(messages=messages)
assert is_cache_control_set is True, "Cache control should be detected in content blocks"
# Generate headers for a Vertex AI request with prompt caching
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=is_cache_control_set,
is_vertex_request=True
)
# Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"
def test_anthropic_vertex_other_beta_headers():
"""
Test that other beta headers are not included for Vertex AI requests.
"""
config = AnthropicConfig()
# Generate headers with multiple beta features
headers = config.get_anthropic_headers(
api_key="test-api-key",
prompt_caching_set=True,
computer_tool_used=True, # This should be excluded for Vertex
pdf_used=True, # This should be excluded for Vertex
is_vertex_request=True
)
# Verify that only prompt-caching is included in the beta header
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
assert headers["anthropic-beta"] == "prompt-caching-2024-07-31", "Only prompt-caching should be in the beta header"
assert "computer-use-2024-10-22" not in headers["anthropic-beta"], "computer-use beta should not be included"
assert "pdfs-2024-09-25" not in headers["anthropic-beta"], "pdfs beta should not be included"

View file

@ -39,7 +39,7 @@ async def test_request_body_caching():
result1 = await _read_request_body(mock_request)
assert result1 == test_data
assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == test_data
assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})
# Verify the body was read once
mock_request.body.assert_called_once()
@ -49,7 +49,7 @@ async def test_request_body_caching():
# Second call should use the cached body
result2 = await _read_request_body(mock_request)
assert result2 == test_data
assert result2 == {"key": "value"}
# Verify the body was not read again
mock_request.body.assert_not_called()
@ -75,7 +75,10 @@ async def test_form_data_parsing():
# Verify the form data was correctly parsed
assert result == test_data
assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == test_data
assert mock_request.scope["parsed_body"] == (
("name", "message"),
{"name": "test_user", "message": "hello world"},
)
# Verify form() was called
mock_request.form.assert_called_once()
@ -101,7 +104,46 @@ async def test_empty_request_body():
# Verify an empty dict is returned
assert result == {}
assert "parsed_body" in mock_request.scope
assert mock_request.scope["parsed_body"] == {}
assert mock_request.scope["parsed_body"] == ((), {})
# Verify the body was read
mock_request.body.assert_called_once()
@pytest.mark.asyncio
async def test_circular_reference_handling():
"""
Test that cached request body isn't modified when the returned result is modified.
Demonstrates the mutable dictionary reference issue.
"""
# Create a mock request with initial data
mock_request = MagicMock()
initial_body = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "Hello"}],
}
mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
mock_request.headers = {"content-type": "application/json"}
mock_request.scope = {}
# First parse
result = await _read_request_body(mock_request)
# Verify initial parse
assert result["model"] == "gpt-4"
assert result["messages"] == [{"role": "user", "content": "Hello"}]
# Modify the result by adding proxy_server_request
result["proxy_server_request"] = {
"url": "http://0.0.0.0:4000/v1/chat/completions",
"method": "POST",
"headers": {"content-type": "application/json"},
"body": result, # Creates circular reference
}
# Second parse using the same request - will use the modified cached value
result2 = await _read_request_body(mock_request)
assert (
"proxy_server_request" not in result2
) # This will pass, showing the cache pollution

View file

@ -0,0 +1,264 @@
import asyncio
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
from litellm.proxy._types import (
DailyUserSpendTransaction,
Litellm_EntityType,
SpendUpdateQueueItem,
)
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
DailySpendUpdateQueue,
)
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
@pytest.fixture
def daily_spend_update_queue():
return DailySpendUpdateQueue()
@pytest.mark.asyncio
async def test_empty_queue_flush(daily_spend_update_queue):
"""Test flushing an empty queue returns an empty list"""
result = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
assert result == []
@pytest.mark.asyncio
async def test_add_single_update(daily_spend_update_queue):
"""Test adding a single update to the queue"""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
# Add update to queue
await daily_spend_update_queue.add_update({test_key: test_transaction})
# Flush and check
updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
assert len(updates) == 1
assert test_key in updates[0]
assert updates[0][test_key] == test_transaction
@pytest.mark.asyncio
async def test_add_multiple_updates(daily_spend_update_queue):
"""Test adding multiple updates to the queue"""
test_key1 = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction1 = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
test_key2 = "user2_2023-01-01_key456_gpt-3.5-turbo_openai"
test_transaction2 = {
"spend": 5.0,
"prompt_tokens": 200,
"completion_tokens": 30,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
# Add updates to queue
await daily_spend_update_queue.add_update({test_key1: test_transaction1})
await daily_spend_update_queue.add_update({test_key2: test_transaction2})
# Flush and check
updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
assert len(updates) == 2
# Find each transaction in the list of updates
found_transaction1 = False
found_transaction2 = False
for update in updates:
if test_key1 in update:
assert update[test_key1] == test_transaction1
found_transaction1 = True
if test_key2 in update:
assert update[test_key2] == test_transaction2
found_transaction2 = True
assert found_transaction1
assert found_transaction2
@pytest.mark.asyncio
async def test_aggregated_daily_spend_update_empty(daily_spend_update_queue):
"""Test aggregating updates from an empty queue"""
result = (
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
)
assert result == {}
@pytest.mark.asyncio
async def test_get_aggregated_daily_spend_update_transactions_single_key():
"""Test static method for aggregating a single key"""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
updates = [{test_key: test_transaction}]
# Test aggregation
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
updates
)
assert len(result) == 1
assert test_key in result
assert result[test_key] == test_transaction
@pytest.mark.asyncio
async def test_get_aggregated_daily_spend_update_transactions_multiple_keys():
"""Test static method for aggregating multiple different keys"""
test_key1 = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction1 = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
test_key2 = "user2_2023-01-01_key456_gpt-3.5-turbo_openai"
test_transaction2 = {
"spend": 5.0,
"prompt_tokens": 200,
"completion_tokens": 30,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
updates = [{test_key1: test_transaction1}, {test_key2: test_transaction2}]
# Test aggregation
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
updates
)
assert len(result) == 2
assert test_key1 in result
assert test_key2 in result
assert result[test_key1] == test_transaction1
assert result[test_key2] == test_transaction2
@pytest.mark.asyncio
async def test_get_aggregated_daily_spend_update_transactions_same_key():
"""Test static method for aggregating updates with the same key"""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction1 = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
test_transaction2 = {
"spend": 5.0,
"prompt_tokens": 200,
"completion_tokens": 30,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
expected_transaction = {
"spend": 15.0, # 10 + 5
"prompt_tokens": 300, # 100 + 200
"completion_tokens": 80, # 50 + 30
"api_requests": 2, # 1 + 1
"successful_requests": 2, # 1 + 1
"failed_requests": 0, # 0 + 0
}
updates = [{test_key: test_transaction1}, {test_key: test_transaction2}]
# Test aggregation
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
updates
)
assert len(result) == 1
assert test_key in result
assert result[test_key] == expected_transaction
@pytest.mark.asyncio
async def test_flush_and_get_aggregated_daily_spend_update_transactions(
daily_spend_update_queue,
):
"""Test the full workflow of adding, flushing, and aggregating updates"""
test_key = "user1_2023-01-01_key123_gpt-4_openai"
test_transaction1 = {
"spend": 10.0,
"prompt_tokens": 100,
"completion_tokens": 50,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
test_transaction2 = {
"spend": 5.0,
"prompt_tokens": 200,
"completion_tokens": 30,
"api_requests": 1,
"successful_requests": 1,
"failed_requests": 0,
}
expected_transaction = {
"spend": 15.0, # 10 + 5
"prompt_tokens": 300, # 100 + 200
"completion_tokens": 80, # 50 + 30
"api_requests": 2, # 1 + 1
"successful_requests": 2, # 1 + 1
"failed_requests": 0, # 0 + 0
}
# Add updates to queue
await daily_spend_update_queue.add_update({test_key: test_transaction1})
await daily_spend_update_queue.add_update({test_key: test_transaction2})
# Test full workflow
result = (
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
)
assert len(result) == 1
assert test_key in result
assert result[test_key] == expected_transaction

View file

@ -12,7 +12,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS
from litellm.proxy.db.pod_lock_manager import PodLockManager
from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
# Mock Prisma client class

View file

@ -0,0 +1,152 @@
import asyncio
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
from litellm.proxy._types import Litellm_EntityType, SpendUpdateQueueItem
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
@pytest.fixture
def spend_queue():
return SpendUpdateQueue()
@pytest.mark.asyncio
async def test_add_update(spend_queue):
# Test adding a single update
update: SpendUpdateQueueItem = {
"entity_type": Litellm_EntityType.USER,
"entity_id": "user123",
"response_cost": 0.5,
}
await spend_queue.add_update(update)
# Verify update was added by checking queue size
assert spend_queue.update_queue.qsize() == 1
@pytest.mark.asyncio
async def test_missing_response_cost(spend_queue):
# Test with missing response_cost - should default to 0
update: SpendUpdateQueueItem = {
"entity_type": Litellm_EntityType.USER,
"entity_id": "user123",
}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should have created entry with 0 cost
assert aggregated["user_list_transactions"]["user123"] == 0
@pytest.mark.asyncio
async def test_missing_entity_id(spend_queue):
# Test with missing entity_id - should default to empty string
update: SpendUpdateQueueItem = {
"entity_type": Litellm_EntityType.USER,
"response_cost": 1.0,
}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should use empty string as key
assert aggregated["user_list_transactions"][""] == 1.0
@pytest.mark.asyncio
async def test_none_values(spend_queue):
# Test with None values
update: SpendUpdateQueueItem = {
"entity_type": Litellm_EntityType.USER,
"entity_id": None, # type: ignore
"response_cost": None,
}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should handle None values gracefully
assert aggregated["user_list_transactions"][""] == 0
@pytest.mark.asyncio
async def test_multiple_updates_with_missing_fields(spend_queue):
# Test multiple updates with various missing fields
updates: list[SpendUpdateQueueItem] = [
{
"entity_type": Litellm_EntityType.USER,
"entity_id": "user123",
"response_cost": 0.5,
},
{
"entity_type": Litellm_EntityType.USER,
"entity_id": "user123", # missing response_cost
},
{
"entity_type": Litellm_EntityType.USER, # missing entity_id
"response_cost": 1.5,
},
]
for update in updates:
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Verify aggregation
assert (
aggregated["user_list_transactions"]["user123"] == 0.5
) # only the first update with valid cost
assert (
aggregated["user_list_transactions"][""] == 1.5
) # update with missing entity_id
@pytest.mark.asyncio
async def test_unknown_entity_type(spend_queue):
# Test with unknown entity type
update: SpendUpdateQueueItem = {
"entity_type": "UNKNOWN_TYPE", # type: ignore
"entity_id": "123",
"response_cost": 0.5,
}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should ignore unknown entity type
assert all(len(transactions) == 0 for transactions in aggregated.values())
@pytest.mark.asyncio
async def test_missing_entity_type(spend_queue):
# Test with missing entity type
update: SpendUpdateQueueItem = {"entity_id": "123", "response_cost": 0.5}
await spend_queue.add_update(update)
aggregated = (
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
)
# Should ignore updates without entity type
assert all(len(transactions) == 0 for transactions in aggregated.values())

View file

@ -55,3 +55,30 @@ async def test_ui_view_users_with_null_email(mocker, caplog):
assert response == [
LiteLLM_UserTableFiltered(user_id="test-user-null-email", user_email=None)
]
def test_user_daily_activity_types():
"""
Assert all fiels in SpendMetrics are reported in DailySpendMetadata as "total_"
"""
from litellm.proxy.management_endpoints.internal_user_endpoints import (
DailySpendMetadata,
SpendMetrics,
)
# Create a SpendMetrics instance
spend_metrics = SpendMetrics()
# Create a DailySpendMetadata instance
daily_spend_metadata = DailySpendMetadata()
# Assert all fields in SpendMetrics are reported in DailySpendMetadata as "total_"
for field in spend_metrics.__dict__:
if field.startswith("total_"):
assert hasattr(
daily_spend_metadata, field
), f"Field {field} is not reported in DailySpendMetadata"
else:
assert not hasattr(
daily_spend_metadata, field
), f"Field {field} is reported in DailySpendMetadata"

View file

@ -0,0 +1,105 @@
import json
import os
import sys
from unittest.mock import MagicMock, patch
import pytest
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.litellm_pre_call_utils import (
_get_enforced_params,
check_if_token_is_service_account,
)
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
def test_check_if_token_is_service_account():
"""
Test that only keys with `service_account_id` in metadata are considered service accounts
"""
# Test case 1: Service account token
service_account_token = UserAPIKeyAuth(
api_key="test-key", metadata={"service_account_id": "test-service-account"}
)
assert check_if_token_is_service_account(service_account_token) == True
# Test case 2: Regular user token
regular_token = UserAPIKeyAuth(api_key="test-key", metadata={})
assert check_if_token_is_service_account(regular_token) == False
# Test case 3: Token with other metadata
other_metadata_token = UserAPIKeyAuth(
api_key="test-key", metadata={"user_id": "test-user"}
)
assert check_if_token_is_service_account(other_metadata_token) == False
def test_get_enforced_params_for_service_account_settings():
"""
Test that service account enforced params are only added to service account keys
"""
service_account_token = UserAPIKeyAuth(
api_key="test-key", metadata={"service_account_id": "test-service-account"}
)
general_settings_with_service_account_settings = {
"service_account_settings": {"enforced_params": ["metadata.service"]},
}
result = _get_enforced_params(
general_settings=general_settings_with_service_account_settings,
user_api_key_dict=service_account_token,
)
assert result == ["metadata.service"]
regular_token = UserAPIKeyAuth(
api_key="test-key", metadata={"enforced_params": ["user"]}
)
result = _get_enforced_params(
general_settings=general_settings_with_service_account_settings,
user_api_key_dict=regular_token,
)
assert result == ["user"]
@pytest.mark.parametrize(
"general_settings, user_api_key_dict, expected_enforced_params",
[
(
{"enforced_params": ["param1", "param2"]},
UserAPIKeyAuth(
api_key="test_api_key", user_id="test_user_id", org_id="test_org_id"
),
["param1", "param2"],
),
(
{"service_account_settings": {"enforced_params": ["param1", "param2"]}},
UserAPIKeyAuth(
api_key="test_api_key",
user_id="test_user_id",
org_id="test_org_id",
metadata={"service_account_id": "test_service_account_id"},
),
["param1", "param2"],
),
(
{"service_account_settings": {"enforced_params": ["param1", "param2"]}},
UserAPIKeyAuth(
api_key="test_api_key",
metadata={
"enforced_params": ["param3", "param4"],
"service_account_id": "test_service_account_id",
},
),
["param1", "param2", "param3", "param4"],
),
],
)
def test_get_enforced_params(
general_settings, user_api_key_dict, expected_enforced_params
):
from litellm.proxy.litellm_pre_call_utils import _get_enforced_params
enforced_params = _get_enforced_params(general_settings, user_api_key_dict)
assert enforced_params == expected_enforced_params

View file

@ -198,6 +198,42 @@ class BaseLLMChatTest(ABC):
messages=image_messages,
)
assert response is not None
def test_file_data_unit_test(self, pdf_messages):
from litellm.utils import supports_pdf_input, return_raw_request
from litellm.types.utils import CallTypes
from litellm.litellm_core_utils.prompt_templates.factory import convert_to_anthropic_image_obj
media_chunk = convert_to_anthropic_image_obj(
openai_image_url=pdf_messages,
format=None,
)
file_content = [
{"type": "text", "text": "What's this file about?"},
{
"type": "file",
"file": {
"file_data": pdf_messages,
}
},
]
image_messages = [{"role": "user", "content": file_content}]
base_completion_call_args = self.get_base_completion_call_args()
if not supports_pdf_input(base_completion_call_args["model"], None):
pytest.skip("Model does not support image input")
raw_request = return_raw_request(
endpoint=CallTypes.completion,
kwargs={**base_completion_call_args, "messages": image_messages},
)
print("RAW REQUEST", raw_request)
assert media_chunk["data"] in json.dumps(raw_request)
def test_message_with_name(self):
try:

View file

@ -268,7 +268,7 @@ async def test_vision_with_custom_model():
{
"type": "image_url",
"image_url": {
"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABB0lEQVRYhe2SzWrEIBCAh2A0jxEs4j6GLDS9hqWmV5Flt0cJS+lRwv742DXpEjY1kOZW6HwHFZnPmVEBEARBEARB/jd0KYA/bcUYbPrRLh6amXHJ/K+ypMoyUaGthILzw0l+xI0jsO7ZcmCcm4ILd+QuVYgpHOmDmz6jBeJImdcUCmeBqQpuqRIbVmQsLCrAalrGpfoEqEogqbLTWuXCPCo+Ki1XGqgQ+jVVuhB8bOaHkvmYuzm/b0KYLWwoK58oFqi6XfxQ4Uz7d6WeKpna6ytUs5e8betMcqAv5YPC5EZB2Lm9FIn0/VP6R58+/GEY1X1egVoZ/3bt/EqF6malgSAIgiDIH+QL41409QMY0LMAAAAASUVORK5CYII="
"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABDElEQVRYhe2SzWqEMBRGPyQTfQxJsc5jBKGzFmlslyFIZxsCQ7sUaWd87EanpdpIrbtC71mE/NyTm9wEIAiCIAiC+N/otQBxU2Sf/aeh4enqptHXri+/yxIq63jlKCw6cXssnr3ObdzdGYFYCJ2IzHKXLygHXCB98Gm4DE+ZZemu5EisQSyZTmyg+AuzQbkezCuIy7EI0k9Ig3FtruwydY+qniqtV5yQyo8qpUIl2fc90KVzJWohWf2qu75vlw52rdfjVDHg8vLWwixW7PChqLkSyUadwfSS0uQZhEvRuIkS53uJvrK8cGWYaPwpGt8efvw+vlo8TPMzcmP8w7lrNypc1RsNgiAIgiD+Iu/RyDYhCaWrgQAAAABJRU5ErkJggg=="
},
},
],

View file

@ -1379,3 +1379,20 @@ def test_azure_modalities_param():
)
assert optional_params["modalities"] == ["text", "audio"]
assert optional_params["audio"] == {"type": "audio_input", "input": "test.wav"}
@pytest.mark.parametrize(
"model, provider",
[
("claude-3-7-sonnet-20240620-v1:0", "anthropic"),
("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"),
("invoke/anthropic.claude-3-7-sonnet-20240620-v1:0", "bedrock"),
("claude-3-7-sonnet@20250219", "vertex_ai"),
],
)
def test_anthropic_unified_reasoning_content(model, provider):
optional_params = get_optional_params(
model=model,
custom_llm_provider=provider,
reasoning_effort="high",
)
assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}

Some files were not shown because too many files have changed in this diff Show more