mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
Merge branch 'BerriAI:main' into main
This commit is contained in:
commit
75f41a2d64
134 changed files with 3935 additions and 1451 deletions
|
@ -1450,7 +1450,7 @@ jobs:
|
|||
command: |
|
||||
pwd
|
||||
ls
|
||||
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
|
||||
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/spend_tracking_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
|
||||
no_output_timeout: 120m
|
||||
|
||||
# Store test results
|
||||
|
@ -1743,6 +1743,96 @@ jobs:
|
|||
# Store test results
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
proxy_spend_accuracy_tests:
|
||||
machine:
|
||||
image: ubuntu-2204:2023.10.1
|
||||
resource_class: xlarge
|
||||
working_directory: ~/project
|
||||
steps:
|
||||
- checkout
|
||||
- setup_google_dns
|
||||
- run:
|
||||
name: Install Docker CLI (In case it's not already installed)
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
|
||||
- run:
|
||||
name: Install Python 3.9
|
||||
command: |
|
||||
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
|
||||
bash miniconda.sh -b -p $HOME/miniconda
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
conda init bash
|
||||
source ~/.bashrc
|
||||
conda create -n myenv python=3.9 -y
|
||||
conda activate myenv
|
||||
python --version
|
||||
- run:
|
||||
name: Install Dependencies
|
||||
command: |
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install aiohttp
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install -r requirements.txt
|
||||
- run:
|
||||
name: Build Docker image
|
||||
command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
|
||||
- run:
|
||||
name: Run Docker container
|
||||
# intentionally give bad redis credentials here
|
||||
# the OTEL test - should get this as a trace
|
||||
command: |
|
||||
docker run -d \
|
||||
-p 4000:4000 \
|
||||
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
||||
-e REDIS_HOST=$REDIS_HOST \
|
||||
-e REDIS_PASSWORD=$REDIS_PASSWORD \
|
||||
-e REDIS_PORT=$REDIS_PORT \
|
||||
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||
-e LITELLM_LICENSE=$LITELLM_LICENSE \
|
||||
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||
-e USE_DDTRACE=True \
|
||||
-e DD_API_KEY=$DD_API_KEY \
|
||||
-e DD_SITE=$DD_SITE \
|
||||
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
||||
--name my-app \
|
||||
-v $(pwd)/litellm/proxy/example_config_yaml/spend_tracking_config.yaml:/app/config.yaml \
|
||||
my-app:latest \
|
||||
--config /app/config.yaml \
|
||||
--port 4000 \
|
||||
--detailed_debug \
|
||||
- run:
|
||||
name: Install curl and dockerize
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y curl
|
||||
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
- run:
|
||||
name: Start outputting logs
|
||||
command: docker logs -f my-app
|
||||
background: true
|
||||
- run:
|
||||
name: Wait for app to be ready
|
||||
command: dockerize -wait http://localhost:4000 -timeout 5m
|
||||
- run:
|
||||
name: Run tests
|
||||
command: |
|
||||
pwd
|
||||
ls
|
||||
python -m pytest -vv tests/spend_tracking_tests -x --junitxml=test-results/junit.xml --durations=5
|
||||
no_output_timeout:
|
||||
120m
|
||||
# Clean up first container
|
||||
- run:
|
||||
name: Stop and remove first container
|
||||
command: |
|
||||
docker stop my-app
|
||||
docker rm my-app
|
||||
|
||||
proxy_multi_instance_tests:
|
||||
machine:
|
||||
|
@ -2553,6 +2643,12 @@ workflows:
|
|||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- proxy_spend_accuracy_tests:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- proxy_multi_instance_tests:
|
||||
filters:
|
||||
branches:
|
||||
|
@ -2714,6 +2810,7 @@ workflows:
|
|||
- installing_litellm_on_python
|
||||
- installing_litellm_on_python_3_13
|
||||
- proxy_logging_guardrails_model_info_tests
|
||||
- proxy_spend_accuracy_tests
|
||||
- proxy_multi_instance_tests
|
||||
- proxy_store_model_in_db_tests
|
||||
- proxy_build_from_pip_tests
|
||||
|
|
4
.github/workflows/test-linting.yml
vendored
4
.github/workflows/test-linting.yml
vendored
|
@ -24,10 +24,10 @@ jobs:
|
|||
run: |
|
||||
poetry install --with dev
|
||||
|
||||
- name: Run Black formatting check
|
||||
- name: Run Black formatting
|
||||
run: |
|
||||
cd litellm
|
||||
poetry run black . --check
|
||||
poetry run black .
|
||||
cd ..
|
||||
|
||||
- name: Run Ruff linting
|
||||
|
|
|
@ -1,2 +1,11 @@
|
|||
python3 -m build
|
||||
twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ -
|
||||
twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ -
|
||||
|
||||
|
||||
Note: You might need to make a MANIFEST.ini file on root for build process incase it fails
|
||||
|
||||
Place this in MANIFEST.ini
|
||||
recursive-exclude venv *
|
||||
recursive-exclude myenv *
|
||||
recursive-exclude py313_env *
|
||||
recursive-exclude **/.venv *
|
||||
|
|
|
@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
# /v1/messages [BETA]
|
||||
|
||||
LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint.
|
||||
Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format.
|
||||
|
||||
This currently just supports the Anthropic API.
|
||||
|
||||
## Overview
|
||||
|
||||
| Feature | Supported | Notes |
|
||||
|-------|-------|-------|
|
||||
|
@ -21,9 +22,61 @@ Planned improvement:
|
|||
- Bedrock Anthropic support
|
||||
|
||||
## Usage
|
||||
---
|
||||
|
||||
### LiteLLM Python SDK
|
||||
|
||||
#### Non-streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
|
||||
Example response:
|
||||
```json
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "Hi! this is a very short joke",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"role": "assistant",
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": null,
|
||||
"type": "message",
|
||||
"usage": {
|
||||
"input_tokens": 2095,
|
||||
"output_tokens": 503,
|
||||
"cache_creation_input_tokens": 2095,
|
||||
"cache_read_input_tokens": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
```
|
||||
|
||||
### LiteLLM Proxy Server
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="PROXY" value="proxy">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
|
@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
|
|||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
<Tabs>
|
||||
<TabItem label="Anthropic Python SDK" value="python">
|
||||
|
||||
```python showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
import anthropic
|
||||
|
||||
# point anthropic sdk to litellm proxy
|
||||
client = anthropic.Anthropic(
|
||||
base_url="http://0.0.0.0:4000",
|
||||
api_key="sk-1234",
|
||||
)
|
||||
|
||||
response = client.messages.create(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem label="curl" value="curl">
|
||||
|
||||
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||
-H 'content-type: application/json' \
|
||||
-H 'x-api-key: $LITELLM_API_KEY' \
|
||||
|
@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
|||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "List 5 important events in the XIX century"
|
||||
}
|
||||
]
|
||||
"content": "Hello, can you tell me a short joke?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 4096
|
||||
"max_tokens": 100
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
</Tabs>
|
||||
|
||||
```python
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
# set env
|
||||
os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
|
||||
## Request Format
|
||||
---
|
||||
|
||||
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
|
||||
Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
|
||||
|
||||
# Call the handler
|
||||
async def call():
|
||||
response = await anthropic_messages(
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
model="claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
#### Example request body
|
||||
|
||||
asyncio.run(call())
|
||||
```json
|
||||
{
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, world"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
#### Required Fields
|
||||
- **model** (string):
|
||||
The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
|
||||
- **max_tokens** (integer):
|
||||
The maximum number of tokens to generate before stopping.
|
||||
_Note: The model may stop before reaching this limit; value must be greater than 1._
|
||||
- **messages** (array of objects):
|
||||
An ordered list of conversational turns.
|
||||
Each message object must include:
|
||||
- **role** (enum: `"user"` or `"assistant"`):
|
||||
Specifies the speaker of the message.
|
||||
- **content** (string or array of content blocks):
|
||||
The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.
|
||||
_Example equivalence:_
|
||||
```json
|
||||
{"role": "user", "content": "Hello, Claude"}
|
||||
```
|
||||
is equivalent to:
|
||||
```json
|
||||
{"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
|
||||
```
|
||||
|
||||
#### Optional Fields
|
||||
- **metadata** (object):
|
||||
Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
|
||||
- **stop_sequences** (array of strings):
|
||||
Custom sequences that, when encountered in the generated text, cause the model to stop.
|
||||
- **stream** (boolean):
|
||||
Indicates whether to stream the response using server-sent events.
|
||||
- **system** (string or array):
|
||||
A system prompt providing context or specific instructions to the model.
|
||||
- **temperature** (number):
|
||||
Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`.
|
||||
- **thinking** (object):
|
||||
Configuration for enabling extended thinking. If enabled, it includes:
|
||||
- **budget_tokens** (integer):
|
||||
Minimum of 1024 tokens (and less than `max_tokens`).
|
||||
- **type** (enum):
|
||||
E.g., `"enabled"`.
|
||||
- **tool_choice** (object):
|
||||
Instructs how the model should utilize any provided tools.
|
||||
- **tools** (array of objects):
|
||||
Definitions for tools available to the model. Each tool includes:
|
||||
- **name** (string):
|
||||
The tool’s name.
|
||||
- **description** (string):
|
||||
A detailed description of the tool.
|
||||
- **input_schema** (object):
|
||||
A JSON schema describing the expected input format for the tool.
|
||||
- **top_k** (integer):
|
||||
Limits sampling to the top K options.
|
||||
- **top_p** (number):
|
||||
Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
|
||||
|
||||
|
||||
## Response Format
|
||||
---
|
||||
|
||||
Responses will be in the Anthropic messages API format.
|
||||
|
||||
#### Example Response
|
||||
|
||||
```json
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "Hi! My name is Claude.",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"role": "assistant",
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": null,
|
||||
"type": "message",
|
||||
"usage": {
|
||||
"input_tokens": 2095,
|
||||
"output_tokens": 503,
|
||||
"cache_creation_input_tokens": 2095,
|
||||
"cache_read_input_tokens": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Response fields
|
||||
|
||||
- **content** (array of objects):
|
||||
Contains the generated content blocks from the model. Each block includes:
|
||||
- **type** (string):
|
||||
Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
|
||||
- **text** (string):
|
||||
The generated text from the model.
|
||||
_Note: Maximum length is 5,000,000 characters._
|
||||
- **citations** (array of objects or `null`):
|
||||
Optional field providing citation details. Each citation includes:
|
||||
- **cited_text** (string):
|
||||
The excerpt being cited.
|
||||
- **document_index** (integer):
|
||||
An index referencing the cited document.
|
||||
- **document_title** (string or `null`):
|
||||
The title of the cited document.
|
||||
- **start_char_index** (integer):
|
||||
The starting character index for the citation.
|
||||
- **end_char_index** (integer):
|
||||
The ending character index for the citation.
|
||||
- **type** (string):
|
||||
Typically `"char_location"`.
|
||||
|
||||
- **id** (string):
|
||||
A unique identifier for the response message.
|
||||
_Note: The format and length of IDs may change over time._
|
||||
|
||||
- **model** (string):
|
||||
Specifies the model that generated the response.
|
||||
|
||||
- **role** (string):
|
||||
Indicates the role of the generated message. For responses, this is always `"assistant"`.
|
||||
|
||||
- **stop_reason** (string):
|
||||
Explains why the model stopped generating text. Possible values include:
|
||||
- `"end_turn"`: The model reached a natural stopping point.
|
||||
- `"max_tokens"`: The generation stopped because the maximum token limit was reached.
|
||||
- `"stop_sequence"`: A custom stop sequence was encountered.
|
||||
- `"tool_use"`: The model invoked one or more tools.
|
||||
|
||||
- **stop_sequence** (string or `null`):
|
||||
Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
|
||||
|
||||
- **type** (string):
|
||||
Denotes the type of response object, which is always `"message"`.
|
||||
|
||||
- **usage** (object):
|
||||
Provides details on token usage for billing and rate limiting. This includes:
|
||||
- **input_tokens** (integer):
|
||||
Total number of input tokens processed.
|
||||
- **output_tokens** (integer):
|
||||
Total number of output tokens generated.
|
||||
- **cache_creation_input_tokens** (integer or `null`):
|
||||
Number of tokens used to create a cache entry.
|
||||
- **cache_read_input_tokens** (integer or `null`):
|
||||
Number of tokens read from the cache.
|
||||
|
|
|
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
|
||||
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching/caching.py)
|
||||
|
||||
:::info
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
|
||||
# Enterprise
|
||||
For companies that need SSO, user management and professional support for LiteLLM Proxy
|
||||
|
||||
|
@ -7,6 +9,8 @@ Get free 7-day trial key [here](https://www.litellm.ai/#trial)
|
|||
|
||||
Includes all enterprise features.
|
||||
|
||||
<Image img={require('../img/enterprise_vs_oss.png')} />
|
||||
|
||||
[**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)
|
||||
|
||||
|
||||
|
|
|
@ -1035,8 +1035,10 @@ response = completion(
|
|||
"content": [
|
||||
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||
}
|
||||
},
|
||||
],
|
||||
}
|
||||
|
@ -1081,8 +1083,10 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
|||
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
|
@ -1168,14 +1168,22 @@ os.environ["AWS_REGION_NAME"] = ""
|
|||
# pdf url
|
||||
image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
||||
|
||||
# Download the file
|
||||
response = requests.get(url)
|
||||
file_data = response.content
|
||||
|
||||
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||
|
||||
# model
|
||||
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
|
||||
image_content = [
|
||||
{"type": "text", "text": "What's this file about?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": image_url, # OR {"url": image_url}
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
|
@ -1221,8 +1229,10 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
|||
"messages": [
|
||||
{"role": "user", "content": {"type": "text", "text": "What's this file about?"}},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||
}
|
||||
}
|
||||
]
|
||||
}'
|
||||
|
|
|
@ -365,7 +365,7 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
|||
</Tabs>
|
||||
|
||||
## Specifying Safety Settings
|
||||
In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
|
||||
In certain use-cases you may need to make calls to the models and pass [safety settings](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
|
||||
|
||||
```python
|
||||
response = completion(
|
||||
|
|
|
@ -82,7 +82,7 @@ from litellm import completion
|
|||
os.environ["XAI_API_KEY"] = "your-api-key"
|
||||
|
||||
response = completion(
|
||||
model="xai/grok-2-latest",
|
||||
model="xai/grok-2-vision-latest",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
|
|
|
@ -23,6 +23,12 @@ In the newly created guard's page, you can find a reference to the prompt policy
|
|||
|
||||
You can decide which detections will be enabled, and set the threshold for each detection.
|
||||
|
||||
:::info
|
||||
When using LiteLLM with virtual keys, key-specific policies can be set directly in Aim's guards page by specifying the virtual key alias when creating the guard.
|
||||
|
||||
Only the aliases of your virtual keys (and not the actual key secrets) will be sent to Aim.
|
||||
:::
|
||||
|
||||
### 3. Add Aim Guardrail on your LiteLLM config.yaml
|
||||
|
||||
Define your guardrails under the `guardrails` section
|
||||
|
|
|
@ -17,6 +17,14 @@ model_list:
|
|||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: general-guard
|
||||
litellm_params:
|
||||
guardrail: aim
|
||||
mode: [pre_call, post_call]
|
||||
api_key: os.environ/AIM_API_KEY
|
||||
api_base: os.environ/AIM_API_BASE
|
||||
default_on: true # Optional
|
||||
|
||||
- guardrail_name: "aporia-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: aporia # supported values: "aporia", "lakera"
|
||||
|
@ -45,6 +53,7 @@ guardrails:
|
|||
- `pre_call` Run **before** LLM call, on **input**
|
||||
- `post_call` Run **after** LLM call, on **input & output**
|
||||
- `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call. Response not returned until guardrail check completes
|
||||
- A list of the above values to run multiple modes, e.g. `mode: [pre_call, post_call]`
|
||||
|
||||
|
||||
## 2. Start LiteLLM Gateway
|
||||
|
@ -569,4 +578,4 @@ guardrails: Union[
|
|||
|
||||
class DynamicGuardrailParams:
|
||||
extra_body: Dict[str, Any] # Additional parameters for the guardrail
|
||||
```
|
||||
```
|
||||
|
|
BIN
docs/my-website/img/enterprise_vs_oss.png
Normal file
BIN
docs/my-website/img/enterprise_vs_oss.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 61 KiB |
|
@ -137,15 +137,17 @@ const sidebars = {
|
|||
label: "[Beta] Guardrails",
|
||||
items: [
|
||||
"proxy/guardrails/quick_start",
|
||||
"proxy/guardrails/aim_security",
|
||||
"proxy/guardrails/aporia_api",
|
||||
"proxy/guardrails/bedrock",
|
||||
"proxy/guardrails/guardrails_ai",
|
||||
"proxy/guardrails/lakera_ai",
|
||||
"proxy/guardrails/pii_masking_v2",
|
||||
"proxy/guardrails/secret_detection",
|
||||
"proxy/guardrails/custom_guardrail",
|
||||
"prompt_injection"
|
||||
...[
|
||||
"proxy/guardrails/aim_security",
|
||||
"proxy/guardrails/aporia_api",
|
||||
"proxy/guardrails/bedrock",
|
||||
"proxy/guardrails/guardrails_ai",
|
||||
"proxy/guardrails/lakera_ai",
|
||||
"proxy/guardrails/pii_masking_v2",
|
||||
"proxy/guardrails/secret_detection",
|
||||
"proxy/guardrails/custom_guardrail",
|
||||
"proxy/guardrails/prompt_injection",
|
||||
].sort(),
|
||||
],
|
||||
},
|
||||
{
|
||||
|
|
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2-py3-none-any.whl
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2.tar.gz
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2.tar.gz
vendored
Normal file
Binary file not shown.
|
@ -0,0 +1,4 @@
|
|||
-- AlterTable
|
||||
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "failed_requests" INTEGER NOT NULL DEFAULT 0,
|
||||
ADD COLUMN "successful_requests" INTEGER NOT NULL DEFAULT 0;
|
||||
|
7
litellm-proxy-extras/poetry.lock
generated
Normal file
7
litellm-proxy-extras/poetry.lock
generated
Normal file
|
@ -0,0 +1,7 @@
|
|||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
package = []
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||
content-hash = "2cf39473e67ff0615f0a61c9d2ac9f02b38cc08cbb1bdb893d89bee002646623"
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm-proxy-extras"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
|
||||
authors = ["BerriAI"]
|
||||
readme = "README.md"
|
||||
|
@ -22,7 +22,7 @@ requires = ["poetry-core"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
version_files = [
|
||||
"pyproject.toml:version",
|
||||
"../requirements.txt:litellm-proxy-extras==",
|
||||
|
|
|
@ -1038,6 +1038,7 @@ from .cost_calculator import response_cost_calculator, cost_per_token
|
|||
|
||||
### ADAPTERS ###
|
||||
from .types.adapter import AdapterItem
|
||||
import litellm.anthropic_interface as anthropic
|
||||
|
||||
adapters: List[AdapterItem] = []
|
||||
|
||||
|
|
|
@ -214,7 +214,7 @@ def _init_redis_sentinel(redis_kwargs) -> redis.Redis:
|
|||
|
||||
# Set up the Sentinel client
|
||||
sentinel = redis.Sentinel(
|
||||
sentinel_nodes,
|
||||
sentinel_nodes,
|
||||
socket_timeout=0.1,
|
||||
password=sentinel_password,
|
||||
)
|
||||
|
|
|
@ -3,4 +3,4 @@ import importlib_metadata
|
|||
try:
|
||||
version = importlib_metadata.version("litellm")
|
||||
except Exception:
|
||||
pass
|
||||
version = "unknown"
|
||||
|
|
6
litellm/anthropic_interface/__init__.py
Normal file
6
litellm/anthropic_interface/__init__.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
"""
|
||||
Anthropic module for LiteLLM
|
||||
"""
|
||||
from .messages import acreate, create
|
||||
|
||||
__all__ = ["acreate", "create"]
|
117
litellm/anthropic_interface/messages/__init__.py
Normal file
117
litellm/anthropic_interface/messages/__init__.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
"""
|
||||
Interface for Anthropic's messages API
|
||||
|
||||
Use this to call LLMs in Anthropic /messages Request/Response format
|
||||
|
||||
This is an __init__.py file to allow the following interface
|
||||
|
||||
- litellm.messages.acreate
|
||||
- litellm.messages.create
|
||||
|
||||
"""
|
||||
|
||||
from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages.handler import (
|
||||
anthropic_messages as _async_anthropic_messages,
|
||||
)
|
||||
from litellm.types.llms.anthropic_messages.anthropic_response import (
|
||||
AnthropicMessagesResponse,
|
||||
)
|
||||
|
||||
|
||||
async def acreate(
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = 1.0,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
|
||||
"""
|
||||
Async wrapper for Anthropic's messages API
|
||||
|
||||
Args:
|
||||
max_tokens (int): Maximum tokens to generate (required)
|
||||
messages (List[Dict]): List of message objects with role and content (required)
|
||||
model (str): Model name to use (required)
|
||||
metadata (Dict, optional): Request metadata
|
||||
stop_sequences (List[str], optional): Custom stop sequences
|
||||
stream (bool, optional): Whether to stream the response
|
||||
system (str, optional): System prompt
|
||||
temperature (float, optional): Sampling temperature (0.0 to 1.0)
|
||||
thinking (Dict, optional): Extended thinking configuration
|
||||
tool_choice (Dict, optional): Tool choice configuration
|
||||
tools (List[Dict], optional): List of tool definitions
|
||||
top_k (int, optional): Top K sampling parameter
|
||||
top_p (float, optional): Nucleus sampling parameter
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
Dict: Response from the API
|
||||
"""
|
||||
return await _async_anthropic_messages(
|
||||
max_tokens=max_tokens,
|
||||
messages=messages,
|
||||
model=model,
|
||||
metadata=metadata,
|
||||
stop_sequences=stop_sequences,
|
||||
stream=stream,
|
||||
system=system,
|
||||
temperature=temperature,
|
||||
thinking=thinking,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
async def create(
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = 1.0,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
**kwargs
|
||||
) -> Union[AnthropicMessagesResponse, Iterator]:
|
||||
"""
|
||||
Async wrapper for Anthropic's messages API
|
||||
|
||||
Args:
|
||||
max_tokens (int): Maximum tokens to generate (required)
|
||||
messages (List[Dict]): List of message objects with role and content (required)
|
||||
model (str): Model name to use (required)
|
||||
metadata (Dict, optional): Request metadata
|
||||
stop_sequences (List[str], optional): Custom stop sequences
|
||||
stream (bool, optional): Whether to stream the response
|
||||
system (str, optional): System prompt
|
||||
temperature (float, optional): Sampling temperature (0.0 to 1.0)
|
||||
thinking (Dict, optional): Extended thinking configuration
|
||||
tool_choice (Dict, optional): Tool choice configuration
|
||||
tools (List[Dict], optional): List of tool definitions
|
||||
top_k (int, optional): Top K sampling parameter
|
||||
top_p (float, optional): Nucleus sampling parameter
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
Dict: Response from the API
|
||||
"""
|
||||
raise NotImplementedError("This function is not implemented")
|
116
litellm/anthropic_interface/readme.md
Normal file
116
litellm/anthropic_interface/readme.md
Normal file
|
@ -0,0 +1,116 @@
|
|||
## Use LLM API endpoints in Anthropic Interface
|
||||
|
||||
Note: This is called `anthropic_interface` because `anthropic` is a known python package and was failing mypy type checking.
|
||||
|
||||
|
||||
## Usage
|
||||
---
|
||||
|
||||
### LiteLLM Python SDK
|
||||
|
||||
#### Non-streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
|
||||
Example response:
|
||||
```json
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "Hi! this is a very short joke",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"role": "assistant",
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": null,
|
||||
"type": "message",
|
||||
"usage": {
|
||||
"input_tokens": 2095,
|
||||
"output_tokens": 503,
|
||||
"cache_creation_input_tokens": 2095,
|
||||
"cache_read_input_tokens": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Streaming example
|
||||
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||
import litellm
|
||||
response = await litellm.anthropic.messages.acreate(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
api_key=api_key,
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
stream=True,
|
||||
)
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
```
|
||||
|
||||
### LiteLLM Proxy Server
|
||||
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: anthropic-claude
|
||||
litellm_params:
|
||||
model: claude-3-7-sonnet-latest
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="Anthropic Python SDK" value="python">
|
||||
|
||||
```python showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
import anthropic
|
||||
|
||||
# point anthropic sdk to litellm proxy
|
||||
client = anthropic.Anthropic(
|
||||
base_url="http://0.0.0.0:4000",
|
||||
api_key="sk-1234",
|
||||
)
|
||||
|
||||
response = client.messages.create(
|
||||
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||
model="anthropic/claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem label="curl" value="curl">
|
||||
|
||||
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||
-H 'content-type: application/json' \
|
||||
-H 'x-api-key: $LITELLM_API_KEY' \
|
||||
-H 'anthropic-version: 2023-06-01' \
|
||||
-d '{
|
||||
"model": "anthropic-claude",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, can you tell me a short joke?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 100
|
||||
}'
|
||||
```
|
|
@ -19,6 +19,7 @@ DEFAULT_IMAGE_HEIGHT = 300
|
|||
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
||||
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
||||
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
|
||||
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer"
|
||||
MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
|
||||
#### RELIABILITY ####
|
||||
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
||||
|
|
|
@ -550,6 +550,7 @@ def completion_cost( # noqa: PLR0915
|
|||
custom_pricing: Optional[bool] = None,
|
||||
base_model: Optional[str] = None,
|
||||
standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
|
||||
litellm_model_name: Optional[str] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
|
||||
|
@ -602,7 +603,7 @@ def completion_cost( # noqa: PLR0915
|
|||
completion_response=completion_response
|
||||
)
|
||||
rerank_billed_units: Optional[RerankBilledUnits] = None
|
||||
model = _select_model_name_for_cost_calc(
|
||||
selected_model = _select_model_name_for_cost_calc(
|
||||
model=model,
|
||||
completion_response=completion_response,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
|
@ -610,232 +611,268 @@ def completion_cost( # noqa: PLR0915
|
|||
base_model=base_model,
|
||||
)
|
||||
|
||||
verbose_logger.info(f"selected model name for cost calculation: {model}")
|
||||
potential_model_names = [selected_model]
|
||||
if model is not None:
|
||||
potential_model_names.append(model)
|
||||
|
||||
if completion_response is not None and (
|
||||
isinstance(completion_response, BaseModel)
|
||||
or isinstance(completion_response, dict)
|
||||
): # tts returns a custom class
|
||||
if isinstance(completion_response, dict):
|
||||
usage_obj: Optional[Union[dict, Usage]] = completion_response.get(
|
||||
"usage", {}
|
||||
)
|
||||
else:
|
||||
usage_obj = getattr(completion_response, "usage", {})
|
||||
if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
|
||||
usage_obj=usage_obj
|
||||
):
|
||||
setattr(
|
||||
completion_response,
|
||||
"usage",
|
||||
litellm.Usage(**usage_obj.model_dump()),
|
||||
)
|
||||
if usage_obj is None:
|
||||
_usage = {}
|
||||
elif isinstance(usage_obj, BaseModel):
|
||||
_usage = usage_obj.model_dump()
|
||||
else:
|
||||
_usage = usage_obj
|
||||
|
||||
if ResponseAPILoggingUtils._is_response_api_usage(_usage):
|
||||
_usage = (
|
||||
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||
_usage
|
||||
).model_dump()
|
||||
)
|
||||
|
||||
# get input/output tokens from completion_response
|
||||
prompt_tokens = _usage.get("prompt_tokens", 0)
|
||||
completion_tokens = _usage.get("completion_tokens", 0)
|
||||
cache_creation_input_tokens = _usage.get("cache_creation_input_tokens", 0)
|
||||
cache_read_input_tokens = _usage.get("cache_read_input_tokens", 0)
|
||||
if (
|
||||
"prompt_tokens_details" in _usage
|
||||
and _usage["prompt_tokens_details"] != {}
|
||||
and _usage["prompt_tokens_details"]
|
||||
):
|
||||
prompt_tokens_details = _usage.get("prompt_tokens_details", {})
|
||||
cache_read_input_tokens = prompt_tokens_details.get("cached_tokens", 0)
|
||||
|
||||
total_time = getattr(completion_response, "_response_ms", 0)
|
||||
|
||||
hidden_params = getattr(completion_response, "_hidden_params", None)
|
||||
if hidden_params is not None:
|
||||
custom_llm_provider = hidden_params.get(
|
||||
"custom_llm_provider", custom_llm_provider or None
|
||||
)
|
||||
region_name = hidden_params.get("region_name", region_name)
|
||||
size = hidden_params.get("optional_params", {}).get(
|
||||
"size", "1024-x-1024"
|
||||
) # openai default
|
||||
quality = hidden_params.get("optional_params", {}).get(
|
||||
"quality", "standard"
|
||||
) # openai default
|
||||
n = hidden_params.get("optional_params", {}).get(
|
||||
"n", 1
|
||||
) # openai default
|
||||
else:
|
||||
if model is None:
|
||||
raise ValueError(
|
||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||
)
|
||||
if len(messages) > 0:
|
||||
prompt_tokens = token_counter(model=model, messages=messages)
|
||||
elif len(prompt) > 0:
|
||||
prompt_tokens = token_counter(model=model, text=prompt)
|
||||
completion_tokens = token_counter(model=model, text=completion)
|
||||
|
||||
if model is None:
|
||||
raise ValueError(
|
||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||
)
|
||||
if custom_llm_provider is None:
|
||||
for idx, model in enumerate(potential_model_names):
|
||||
try:
|
||||
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model
|
||||
) # strip the llm provider from the model name -> for image gen cost calculation
|
||||
verbose_logger.info(
|
||||
f"selected model name for cost calculation: {model}"
|
||||
)
|
||||
|
||||
if completion_response is not None and (
|
||||
isinstance(completion_response, BaseModel)
|
||||
or isinstance(completion_response, dict)
|
||||
): # tts returns a custom class
|
||||
if isinstance(completion_response, dict):
|
||||
usage_obj: Optional[
|
||||
Union[dict, Usage]
|
||||
] = completion_response.get("usage", {})
|
||||
else:
|
||||
usage_obj = getattr(completion_response, "usage", {})
|
||||
if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
|
||||
usage_obj=usage_obj
|
||||
):
|
||||
setattr(
|
||||
completion_response,
|
||||
"usage",
|
||||
litellm.Usage(**usage_obj.model_dump()),
|
||||
)
|
||||
if usage_obj is None:
|
||||
_usage = {}
|
||||
elif isinstance(usage_obj, BaseModel):
|
||||
_usage = usage_obj.model_dump()
|
||||
else:
|
||||
_usage = usage_obj
|
||||
|
||||
if ResponseAPILoggingUtils._is_response_api_usage(_usage):
|
||||
_usage = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||
_usage
|
||||
).model_dump()
|
||||
|
||||
# get input/output tokens from completion_response
|
||||
prompt_tokens = _usage.get("prompt_tokens", 0)
|
||||
completion_tokens = _usage.get("completion_tokens", 0)
|
||||
cache_creation_input_tokens = _usage.get(
|
||||
"cache_creation_input_tokens", 0
|
||||
)
|
||||
cache_read_input_tokens = _usage.get("cache_read_input_tokens", 0)
|
||||
if (
|
||||
"prompt_tokens_details" in _usage
|
||||
and _usage["prompt_tokens_details"] != {}
|
||||
and _usage["prompt_tokens_details"]
|
||||
):
|
||||
prompt_tokens_details = _usage.get("prompt_tokens_details", {})
|
||||
cache_read_input_tokens = prompt_tokens_details.get(
|
||||
"cached_tokens", 0
|
||||
)
|
||||
|
||||
total_time = getattr(completion_response, "_response_ms", 0)
|
||||
|
||||
hidden_params = getattr(completion_response, "_hidden_params", None)
|
||||
if hidden_params is not None:
|
||||
custom_llm_provider = hidden_params.get(
|
||||
"custom_llm_provider", custom_llm_provider or None
|
||||
)
|
||||
region_name = hidden_params.get("region_name", region_name)
|
||||
size = hidden_params.get("optional_params", {}).get(
|
||||
"size", "1024-x-1024"
|
||||
) # openai default
|
||||
quality = hidden_params.get("optional_params", {}).get(
|
||||
"quality", "standard"
|
||||
) # openai default
|
||||
n = hidden_params.get("optional_params", {}).get(
|
||||
"n", 1
|
||||
) # openai default
|
||||
else:
|
||||
if model is None:
|
||||
raise ValueError(
|
||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||
)
|
||||
if len(messages) > 0:
|
||||
prompt_tokens = token_counter(model=model, messages=messages)
|
||||
elif len(prompt) > 0:
|
||||
prompt_tokens = token_counter(model=model, text=prompt)
|
||||
completion_tokens = token_counter(model=model, text=completion)
|
||||
|
||||
if model is None:
|
||||
raise ValueError(
|
||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||
)
|
||||
if custom_llm_provider is None:
|
||||
try:
|
||||
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model
|
||||
) # strip the llm provider from the model name -> for image gen cost calculation
|
||||
except Exception as e:
|
||||
verbose_logger.debug(
|
||||
"litellm.cost_calculator.py::completion_cost() - Error inferring custom_llm_provider - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
if (
|
||||
call_type == CallTypes.image_generation.value
|
||||
or call_type == CallTypes.aimage_generation.value
|
||||
or call_type
|
||||
== PassthroughCallTypes.passthrough_image_generation.value
|
||||
):
|
||||
### IMAGE GENERATION COST CALCULATION ###
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
if isinstance(completion_response, ImageResponse):
|
||||
return vertex_ai_image_cost_calculator(
|
||||
model=model,
|
||||
image_response=completion_response,
|
||||
)
|
||||
elif custom_llm_provider == "bedrock":
|
||||
if isinstance(completion_response, ImageResponse):
|
||||
return bedrock_image_cost_calculator(
|
||||
model=model,
|
||||
size=size,
|
||||
image_response=completion_response,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
raise TypeError(
|
||||
"completion_response must be of type ImageResponse for bedrock image cost calculation"
|
||||
)
|
||||
else:
|
||||
return default_image_cost_calculator(
|
||||
model=model,
|
||||
quality=quality,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
n=n,
|
||||
size=size,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
elif (
|
||||
call_type == CallTypes.speech.value
|
||||
or call_type == CallTypes.aspeech.value
|
||||
):
|
||||
prompt_characters = litellm.utils._count_characters(text=prompt)
|
||||
elif (
|
||||
call_type == CallTypes.atranscription.value
|
||||
or call_type == CallTypes.transcription.value
|
||||
):
|
||||
audio_transcription_file_duration = getattr(
|
||||
completion_response, "duration", 0.0
|
||||
)
|
||||
elif (
|
||||
call_type == CallTypes.rerank.value
|
||||
or call_type == CallTypes.arerank.value
|
||||
):
|
||||
if completion_response is not None and isinstance(
|
||||
completion_response, RerankResponse
|
||||
):
|
||||
meta_obj = completion_response.meta
|
||||
if meta_obj is not None:
|
||||
billed_units = meta_obj.get("billed_units", {}) or {}
|
||||
else:
|
||||
billed_units = {}
|
||||
|
||||
rerank_billed_units = RerankBilledUnits(
|
||||
search_units=billed_units.get("search_units"),
|
||||
total_tokens=billed_units.get("total_tokens"),
|
||||
)
|
||||
|
||||
search_units = (
|
||||
billed_units.get("search_units") or 1
|
||||
) # cohere charges per request by default.
|
||||
completion_tokens = search_units
|
||||
# Calculate cost based on prompt_tokens, completion_tokens
|
||||
if (
|
||||
"togethercomputer" in model
|
||||
or "together_ai" in model
|
||||
or custom_llm_provider == "together_ai"
|
||||
):
|
||||
# together ai prices based on size of llm
|
||||
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
|
||||
|
||||
model = get_model_params_and_category(
|
||||
model, call_type=CallTypes(call_type)
|
||||
)
|
||||
|
||||
# replicate llms are calculate based on time for request running
|
||||
# see https://replicate.com/pricing
|
||||
elif (
|
||||
model in litellm.replicate_models or "replicate" in model
|
||||
) and model not in litellm.model_cost:
|
||||
# for unmapped replicate model, default to replicate's time tracking logic
|
||||
return get_replicate_completion_pricing(completion_response, total_time) # type: ignore
|
||||
|
||||
if model is None:
|
||||
raise ValueError(
|
||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||
)
|
||||
|
||||
if (
|
||||
custom_llm_provider is not None
|
||||
and custom_llm_provider == "vertex_ai"
|
||||
):
|
||||
# Calculate the prompt characters + response characters
|
||||
if len(messages) > 0:
|
||||
prompt_string = litellm.utils.get_formatted_prompt(
|
||||
data={"messages": messages}, call_type="completion"
|
||||
)
|
||||
|
||||
prompt_characters = litellm.utils._count_characters(
|
||||
text=prompt_string
|
||||
)
|
||||
if completion_response is not None and isinstance(
|
||||
completion_response, ModelResponse
|
||||
):
|
||||
completion_string = litellm.utils.get_response_string(
|
||||
response_obj=completion_response
|
||||
)
|
||||
completion_characters = litellm.utils._count_characters(
|
||||
text=completion_string
|
||||
)
|
||||
|
||||
(
|
||||
prompt_tokens_cost_usd_dollar,
|
||||
completion_tokens_cost_usd_dollar,
|
||||
) = cost_per_token(
|
||||
model=model,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
response_time_ms=total_time,
|
||||
region_name=region_name,
|
||||
custom_cost_per_second=custom_cost_per_second,
|
||||
custom_cost_per_token=custom_cost_per_token,
|
||||
prompt_characters=prompt_characters,
|
||||
completion_characters=completion_characters,
|
||||
cache_creation_input_tokens=cache_creation_input_tokens,
|
||||
cache_read_input_tokens=cache_read_input_tokens,
|
||||
usage_object=cost_per_token_usage_object,
|
||||
call_type=cast(CallTypesLiteral, call_type),
|
||||
audio_transcription_file_duration=audio_transcription_file_duration,
|
||||
rerank_billed_units=rerank_billed_units,
|
||||
)
|
||||
_final_cost = (
|
||||
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||
)
|
||||
_final_cost += (
|
||||
StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
|
||||
model=model,
|
||||
response_object=completion_response,
|
||||
standard_built_in_tools_params=standard_built_in_tools_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
)
|
||||
return _final_cost
|
||||
except Exception as e:
|
||||
verbose_logger.debug(
|
||||
"litellm.cost_calculator.py::completion_cost() - Error inferring custom_llm_provider - {}".format(
|
||||
str(e)
|
||||
"litellm.cost_calculator.py::completion_cost() - Error calculating cost for model={} - {}".format(
|
||||
model, str(e)
|
||||
)
|
||||
)
|
||||
if (
|
||||
call_type == CallTypes.image_generation.value
|
||||
or call_type == CallTypes.aimage_generation.value
|
||||
or call_type == PassthroughCallTypes.passthrough_image_generation.value
|
||||
):
|
||||
### IMAGE GENERATION COST CALCULATION ###
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
if isinstance(completion_response, ImageResponse):
|
||||
return vertex_ai_image_cost_calculator(
|
||||
model=model,
|
||||
image_response=completion_response,
|
||||
)
|
||||
elif custom_llm_provider == "bedrock":
|
||||
if isinstance(completion_response, ImageResponse):
|
||||
return bedrock_image_cost_calculator(
|
||||
model=model,
|
||||
size=size,
|
||||
image_response=completion_response,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
raise TypeError(
|
||||
"completion_response must be of type ImageResponse for bedrock image cost calculation"
|
||||
)
|
||||
else:
|
||||
return default_image_cost_calculator(
|
||||
model=model,
|
||||
quality=quality,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
n=n,
|
||||
size=size,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
elif (
|
||||
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
|
||||
):
|
||||
prompt_characters = litellm.utils._count_characters(text=prompt)
|
||||
elif (
|
||||
call_type == CallTypes.atranscription.value
|
||||
or call_type == CallTypes.transcription.value
|
||||
):
|
||||
audio_transcription_file_duration = getattr(
|
||||
completion_response, "duration", 0.0
|
||||
if idx == len(potential_model_names) - 1:
|
||||
raise e
|
||||
raise Exception(
|
||||
"Unable to calculat cost for received potential model names - {}".format(
|
||||
potential_model_names
|
||||
)
|
||||
elif (
|
||||
call_type == CallTypes.rerank.value or call_type == CallTypes.arerank.value
|
||||
):
|
||||
if completion_response is not None and isinstance(
|
||||
completion_response, RerankResponse
|
||||
):
|
||||
meta_obj = completion_response.meta
|
||||
if meta_obj is not None:
|
||||
billed_units = meta_obj.get("billed_units", {}) or {}
|
||||
else:
|
||||
billed_units = {}
|
||||
|
||||
rerank_billed_units = RerankBilledUnits(
|
||||
search_units=billed_units.get("search_units"),
|
||||
total_tokens=billed_units.get("total_tokens"),
|
||||
)
|
||||
|
||||
search_units = (
|
||||
billed_units.get("search_units") or 1
|
||||
) # cohere charges per request by default.
|
||||
completion_tokens = search_units
|
||||
# Calculate cost based on prompt_tokens, completion_tokens
|
||||
if (
|
||||
"togethercomputer" in model
|
||||
or "together_ai" in model
|
||||
or custom_llm_provider == "together_ai"
|
||||
):
|
||||
# together ai prices based on size of llm
|
||||
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
|
||||
|
||||
model = get_model_params_and_category(model, call_type=CallTypes(call_type))
|
||||
|
||||
# replicate llms are calculate based on time for request running
|
||||
# see https://replicate.com/pricing
|
||||
elif (
|
||||
model in litellm.replicate_models or "replicate" in model
|
||||
) and model not in litellm.model_cost:
|
||||
# for unmapped replicate model, default to replicate's time tracking logic
|
||||
return get_replicate_completion_pricing(completion_response, total_time) # type: ignore
|
||||
|
||||
if model is None:
|
||||
raise ValueError(
|
||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||
)
|
||||
|
||||
if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
|
||||
# Calculate the prompt characters + response characters
|
||||
if len(messages) > 0:
|
||||
prompt_string = litellm.utils.get_formatted_prompt(
|
||||
data={"messages": messages}, call_type="completion"
|
||||
)
|
||||
|
||||
prompt_characters = litellm.utils._count_characters(text=prompt_string)
|
||||
if completion_response is not None and isinstance(
|
||||
completion_response, ModelResponse
|
||||
):
|
||||
completion_string = litellm.utils.get_response_string(
|
||||
response_obj=completion_response
|
||||
)
|
||||
completion_characters = litellm.utils._count_characters(
|
||||
text=completion_string
|
||||
)
|
||||
|
||||
(
|
||||
prompt_tokens_cost_usd_dollar,
|
||||
completion_tokens_cost_usd_dollar,
|
||||
) = cost_per_token(
|
||||
model=model,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
response_time_ms=total_time,
|
||||
region_name=region_name,
|
||||
custom_cost_per_second=custom_cost_per_second,
|
||||
custom_cost_per_token=custom_cost_per_token,
|
||||
prompt_characters=prompt_characters,
|
||||
completion_characters=completion_characters,
|
||||
cache_creation_input_tokens=cache_creation_input_tokens,
|
||||
cache_read_input_tokens=cache_read_input_tokens,
|
||||
usage_object=cost_per_token_usage_object,
|
||||
call_type=call_type,
|
||||
audio_transcription_file_duration=audio_transcription_file_duration,
|
||||
rerank_billed_units=rerank_billed_units,
|
||||
)
|
||||
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||
_final_cost += StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
|
||||
model=model,
|
||||
response_object=completion_response,
|
||||
standard_built_in_tools_params=standard_built_in_tools_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
return _final_cost
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
@ -897,6 +934,7 @@ def response_cost_calculator(
|
|||
custom_pricing: Optional[bool] = None,
|
||||
prompt: str = "",
|
||||
standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
|
||||
litellm_model_name: Optional[str] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Returns
|
||||
|
|
|
@ -290,6 +290,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
"input": _input,
|
||||
"litellm_params": litellm_params,
|
||||
"applied_guardrails": applied_guardrails,
|
||||
"model": model,
|
||||
}
|
||||
|
||||
def process_dynamic_callbacks(self):
|
||||
|
@ -892,6 +893,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
ResponseCompletedEvent,
|
||||
],
|
||||
cache_hit: Optional[bool] = None,
|
||||
litellm_model_name: Optional[str] = None,
|
||||
) -> Optional[float]:
|
||||
"""
|
||||
Calculate response cost using result + logging object variables.
|
||||
|
@ -917,7 +919,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
try:
|
||||
response_cost_calculator_kwargs = {
|
||||
"response_object": result,
|
||||
"model": self.model,
|
||||
"model": litellm_model_name or self.model,
|
||||
"cache_hit": cache_hit,
|
||||
"custom_llm_provider": self.model_call_details.get(
|
||||
"custom_llm_provider", None
|
||||
|
@ -1009,6 +1011,10 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
return False
|
||||
return True
|
||||
|
||||
def _update_completion_start_time(self, completion_start_time: datetime.datetime):
|
||||
self.completion_start_time = completion_start_time
|
||||
self.model_call_details["completion_start_time"] = self.completion_start_time
|
||||
|
||||
def _success_handler_helper_fn(
|
||||
self,
|
||||
result=None,
|
||||
|
|
|
@ -22,6 +22,7 @@ from litellm.types.llms.openai import (
|
|||
AllMessageValues,
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionAssistantToolCall,
|
||||
ChatCompletionFileObject,
|
||||
ChatCompletionFunctionMessage,
|
||||
ChatCompletionImageObject,
|
||||
ChatCompletionTextObject,
|
||||
|
@ -1455,6 +1456,25 @@ def anthropic_messages_pt( # noqa: PLR0915
|
|||
user_content.append(_content_element)
|
||||
elif m.get("type", "") == "document":
|
||||
user_content.append(cast(AnthropicMessagesDocumentParam, m))
|
||||
elif m.get("type", "") == "file":
|
||||
file_message = cast(ChatCompletionFileObject, m)
|
||||
file_data = file_message["file"].get("file_data")
|
||||
if file_data:
|
||||
image_chunk = convert_to_anthropic_image_obj(
|
||||
openai_image_url=file_data,
|
||||
format=file_message["file"].get("format"),
|
||||
)
|
||||
anthropic_document_param = (
|
||||
AnthropicMessagesDocumentParam(
|
||||
type="document",
|
||||
source=AnthropicContentParamSource(
|
||||
type="base64",
|
||||
media_type=image_chunk["media_type"],
|
||||
data=image_chunk["data"],
|
||||
),
|
||||
)
|
||||
)
|
||||
user_content.append(anthropic_document_param)
|
||||
elif isinstance(user_message_types_block["content"], str):
|
||||
_anthropic_content_text_element: AnthropicMessagesTextParam = {
|
||||
"type": "text",
|
||||
|
@ -2885,6 +2905,11 @@ class BedrockConverseMessagesProcessor:
|
|||
image_url=image_url, format=format
|
||||
)
|
||||
_parts.append(_part) # type: ignore
|
||||
elif element["type"] == "file":
|
||||
_part = await BedrockConverseMessagesProcessor._async_process_file_message(
|
||||
message=cast(ChatCompletionFileObject, element)
|
||||
)
|
||||
_parts.append(_part)
|
||||
_cache_point_block = (
|
||||
litellm.AmazonConverseConfig()._get_cache_point_block(
|
||||
message_block=cast(
|
||||
|
@ -3054,6 +3079,45 @@ class BedrockConverseMessagesProcessor:
|
|||
reasoning_content_blocks.append(bedrock_content_block)
|
||||
return reasoning_content_blocks
|
||||
|
||||
@staticmethod
|
||||
def _process_file_message(message: ChatCompletionFileObject) -> BedrockContentBlock:
|
||||
file_message = message["file"]
|
||||
file_data = file_message.get("file_data")
|
||||
file_id = file_message.get("file_id")
|
||||
|
||||
if file_data is None and file_id is None:
|
||||
raise litellm.BadRequestError(
|
||||
message="file_data and file_id cannot both be None. Got={}".format(
|
||||
message
|
||||
),
|
||||
model="",
|
||||
llm_provider="bedrock",
|
||||
)
|
||||
format = file_message.get("format")
|
||||
return BedrockImageProcessor.process_image_sync(
|
||||
image_url=cast(str, file_id or file_data), format=format
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def _async_process_file_message(
|
||||
message: ChatCompletionFileObject,
|
||||
) -> BedrockContentBlock:
|
||||
file_message = message["file"]
|
||||
file_data = file_message.get("file_data")
|
||||
file_id = file_message.get("file_id")
|
||||
format = file_message.get("format")
|
||||
if file_data is None and file_id is None:
|
||||
raise litellm.BadRequestError(
|
||||
message="file_data and file_id cannot both be None. Got={}".format(
|
||||
message
|
||||
),
|
||||
model="",
|
||||
llm_provider="bedrock",
|
||||
)
|
||||
return await BedrockImageProcessor.process_image_async(
|
||||
image_url=cast(str, file_id or file_data), format=format
|
||||
)
|
||||
|
||||
|
||||
def _bedrock_converse_messages_pt( # noqa: PLR0915
|
||||
messages: List,
|
||||
|
@ -3126,6 +3190,13 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
|
|||
format=format,
|
||||
)
|
||||
_parts.append(_part) # type: ignore
|
||||
elif element["type"] == "file":
|
||||
_part = (
|
||||
BedrockConverseMessagesProcessor._process_file_message(
|
||||
message=cast(ChatCompletionFileObject, element)
|
||||
)
|
||||
)
|
||||
_parts.append(_part)
|
||||
_cache_point_block = (
|
||||
litellm.AmazonConverseConfig()._get_cache_point_block(
|
||||
message_block=cast(
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import asyncio
|
||||
import collections.abc
|
||||
import datetime
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
|
@ -1567,6 +1568,10 @@ class CustomStreamWrapper:
|
|||
|
||||
if response is None:
|
||||
continue
|
||||
if self.logging_obj.completion_start_time is None:
|
||||
self.logging_obj._update_completion_start_time(
|
||||
completion_start_time=datetime.datetime.now()
|
||||
)
|
||||
## LOGGING
|
||||
executor.submit(
|
||||
self.run_success_logging_and_cache_storage,
|
||||
|
@ -1721,6 +1726,11 @@ class CustomStreamWrapper:
|
|||
if processed_chunk is None:
|
||||
continue
|
||||
|
||||
if self.logging_obj.completion_start_time is None:
|
||||
self.logging_obj._update_completion_start_time(
|
||||
completion_start_time=datetime.datetime.now()
|
||||
)
|
||||
|
||||
choice = processed_chunk.choices[0]
|
||||
if isinstance(choice, StreamingChoices):
|
||||
self.response_uptil_now += choice.delta.get("content", "") or ""
|
||||
|
|
|
@ -18,8 +18,10 @@ from litellm.types.llms.anthropic import (
|
|||
AnthropicMessagesTool,
|
||||
AnthropicMessagesToolChoice,
|
||||
AnthropicSystemMessageContent,
|
||||
AnthropicThinkingParam,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
REASONING_EFFORT,
|
||||
AllMessageValues,
|
||||
ChatCompletionCachedContent,
|
||||
ChatCompletionSystemMessage,
|
||||
|
@ -94,6 +96,7 @@ class AnthropicConfig(BaseConfig):
|
|||
"parallel_tool_calls",
|
||||
"response_format",
|
||||
"user",
|
||||
"reasoning_effort",
|
||||
]
|
||||
|
||||
if "claude-3-7-sonnet" in model:
|
||||
|
@ -141,15 +144,9 @@ class AnthropicConfig(BaseConfig):
|
|||
if user_anthropic_beta_headers is not None:
|
||||
betas.update(user_anthropic_beta_headers)
|
||||
|
||||
# Handle beta headers for Vertex AI
|
||||
# We allow prompt caching beta header for Vertex, but exclude other beta headers that might cause issues
|
||||
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
|
||||
if is_vertex_request is True:
|
||||
vertex_safe_betas = set()
|
||||
# Allow prompt caching beta header for Vertex
|
||||
if "prompt-caching-2024-07-31" in betas:
|
||||
vertex_safe_betas.add("prompt-caching-2024-07-31")
|
||||
if len(vertex_safe_betas) > 0:
|
||||
headers["anthropic-beta"] = ",".join(vertex_safe_betas)
|
||||
pass
|
||||
elif len(betas) > 0:
|
||||
headers["anthropic-beta"] = ",".join(betas)
|
||||
|
||||
|
@ -297,6 +294,21 @@ class AnthropicConfig(BaseConfig):
|
|||
new_stop = new_v
|
||||
return new_stop
|
||||
|
||||
@staticmethod
|
||||
def _map_reasoning_effort(
|
||||
reasoning_effort: Optional[Union[REASONING_EFFORT, str]]
|
||||
) -> Optional[AnthropicThinkingParam]:
|
||||
if reasoning_effort is None:
|
||||
return None
|
||||
elif reasoning_effort == "low":
|
||||
return AnthropicThinkingParam(type="enabled", budget_tokens=1024)
|
||||
elif reasoning_effort == "medium":
|
||||
return AnthropicThinkingParam(type="enabled", budget_tokens=2048)
|
||||
elif reasoning_effort == "high":
|
||||
return AnthropicThinkingParam(type="enabled", budget_tokens=4096)
|
||||
else:
|
||||
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
|
@ -308,10 +320,6 @@ class AnthropicConfig(BaseConfig):
|
|||
non_default_params=non_default_params
|
||||
)
|
||||
|
||||
## handle thinking tokens
|
||||
self.update_optional_params_with_thinking_tokens(
|
||||
non_default_params=non_default_params, optional_params=optional_params
|
||||
)
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
|
@ -376,7 +384,15 @@ class AnthropicConfig(BaseConfig):
|
|||
optional_params["metadata"] = {"user_id": value}
|
||||
if param == "thinking":
|
||||
optional_params["thinking"] = value
|
||||
elif param == "reasoning_effort" and isinstance(value, str):
|
||||
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
|
||||
value
|
||||
)
|
||||
|
||||
## handle thinking tokens
|
||||
self.update_optional_params_with_thinking_tokens(
|
||||
non_default_params=non_default_params, optional_params=optional_params
|
||||
)
|
||||
return optional_params
|
||||
|
||||
def _create_json_tool_call_for_response_format(
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, AsyncIterator, Dict, Optional, Union, cast
|
||||
from typing import AsyncIterator, Dict, List, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -19,6 +19,9 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
AsyncHTTPHandler,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.types.llms.anthropic_messages.anthropic_response import (
|
||||
AnthropicMessagesResponse,
|
||||
)
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.utils import ProviderSpecificHeader
|
||||
from litellm.utils import ProviderConfigManager, client
|
||||
|
@ -60,14 +63,25 @@ class AnthropicMessagesHandler:
|
|||
|
||||
@client
|
||||
async def anthropic_messages(
|
||||
api_key: str,
|
||||
max_tokens: int,
|
||||
messages: List[Dict],
|
||||
model: str,
|
||||
stream: bool = False,
|
||||
metadata: Optional[Dict] = None,
|
||||
stop_sequences: Optional[List[str]] = None,
|
||||
stream: Optional[bool] = False,
|
||||
system: Optional[str] = None,
|
||||
temperature: Optional[float] = None,
|
||||
thinking: Optional[Dict] = None,
|
||||
tool_choice: Optional[Dict] = None,
|
||||
tools: Optional[List[Dict]] = None,
|
||||
top_k: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[Dict[str, Any], AsyncIterator]:
|
||||
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
|
||||
"""
|
||||
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
|
||||
"""
|
||||
|
@ -129,10 +143,8 @@ async def anthropic_messages(
|
|||
},
|
||||
custom_llm_provider=_custom_llm_provider,
|
||||
)
|
||||
litellm_logging_obj.model_call_details.update(kwargs)
|
||||
|
||||
# Prepare request body
|
||||
request_body = kwargs.copy()
|
||||
request_body = locals().copy()
|
||||
request_body = {
|
||||
k: v
|
||||
for k, v in request_body.items()
|
||||
|
@ -140,10 +152,12 @@ async def anthropic_messages(
|
|||
in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
|
||||
model=model
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
request_body["stream"] = stream
|
||||
request_body["model"] = model
|
||||
litellm_logging_obj.stream = stream
|
||||
litellm_logging_obj.model_call_details.update(request_body)
|
||||
|
||||
# Make the request
|
||||
request_url = anthropic_messages_provider_config.get_complete_url(
|
||||
|
@ -164,7 +178,7 @@ async def anthropic_messages(
|
|||
url=request_url,
|
||||
headers=headers,
|
||||
data=json.dumps(request_body),
|
||||
stream=stream,
|
||||
stream=stream or False,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
|
|
@ -104,7 +104,10 @@ class BaseConfig(ABC):
|
|||
return type_to_response_format_param(response_format=response_format)
|
||||
|
||||
def is_thinking_enabled(self, non_default_params: dict) -> bool:
|
||||
return non_default_params.get("thinking", {}).get("type", None) == "enabled"
|
||||
return (
|
||||
non_default_params.get("thinking", {}).get("type") == "enabled"
|
||||
or non_default_params.get("reasoning_effort") is not None
|
||||
)
|
||||
|
||||
def update_optional_params_with_thinking_tokens(
|
||||
self, non_default_params: dict, optional_params: dict
|
||||
|
@ -116,9 +119,9 @@ class BaseConfig(ABC):
|
|||
|
||||
if 'thinking' is enabled and 'max_tokens' is not specified, set 'max_tokens' to the thinking token budget + DEFAULT_MAX_TOKENS
|
||||
"""
|
||||
is_thinking_enabled = self.is_thinking_enabled(non_default_params)
|
||||
is_thinking_enabled = self.is_thinking_enabled(optional_params)
|
||||
if is_thinking_enabled and "max_tokens" not in non_default_params:
|
||||
thinking_token_budget = cast(dict, non_default_params["thinking"]).get(
|
||||
thinking_token_budget = cast(dict, optional_params["thinking"]).get(
|
||||
"budget_tokens", None
|
||||
)
|
||||
if thinking_token_budget is not None:
|
||||
|
|
|
@ -17,6 +17,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
|
|||
_bedrock_converse_messages_pt,
|
||||
_bedrock_tools_pt,
|
||||
)
|
||||
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
|
||||
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
||||
from litellm.types.llms.bedrock import *
|
||||
from litellm.types.llms.openai import (
|
||||
|
@ -128,6 +129,7 @@ class AmazonConverseConfig(BaseConfig):
|
|||
"claude-3-7" in model
|
||||
): # [TODO]: move to a 'supports_reasoning_content' param from model cost map
|
||||
supported_params.append("thinking")
|
||||
supported_params.append("reasoning_effort")
|
||||
return supported_params
|
||||
|
||||
def map_tool_choice_values(
|
||||
|
@ -218,9 +220,7 @@ class AmazonConverseConfig(BaseConfig):
|
|||
messages: Optional[List[AllMessageValues]] = None,
|
||||
) -> dict:
|
||||
is_thinking_enabled = self.is_thinking_enabled(non_default_params)
|
||||
self.update_optional_params_with_thinking_tokens(
|
||||
non_default_params=non_default_params, optional_params=optional_params
|
||||
)
|
||||
|
||||
for param, value in non_default_params.items():
|
||||
if param == "response_format" and isinstance(value, dict):
|
||||
ignore_response_format_types = ["text"]
|
||||
|
@ -297,6 +297,14 @@ class AmazonConverseConfig(BaseConfig):
|
|||
optional_params["tool_choice"] = _tool_choice_value
|
||||
if param == "thinking":
|
||||
optional_params["thinking"] = value
|
||||
elif param == "reasoning_effort" and isinstance(value, str):
|
||||
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
|
||||
value
|
||||
)
|
||||
|
||||
self.update_optional_params_with_thinking_tokens(
|
||||
non_default_params=non_default_params, optional_params=optional_params
|
||||
)
|
||||
|
||||
return optional_params
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ import httpx
|
|||
|
||||
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.types.llms.openrouter import OpenRouterErrorMessage
|
||||
from litellm.types.utils import ModelResponse, ModelResponseStream
|
||||
|
||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
|
@ -71,6 +72,24 @@ class OpenrouterConfig(OpenAIGPTConfig):
|
|||
class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
|
||||
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||
try:
|
||||
## HANDLE ERROR IN CHUNK ##
|
||||
if "error" in chunk:
|
||||
error_chunk = chunk["error"]
|
||||
error_message = OpenRouterErrorMessage(
|
||||
message="Message: {}, Metadata: {}, User ID: {}".format(
|
||||
error_chunk["message"],
|
||||
error_chunk.get("metadata", {}),
|
||||
error_chunk.get("user_id", ""),
|
||||
),
|
||||
code=error_chunk["code"],
|
||||
metadata=error_chunk.get("metadata", {}),
|
||||
)
|
||||
raise OpenRouterException(
|
||||
message=error_message["message"],
|
||||
status_code=error_message["code"],
|
||||
headers=error_message["metadata"].get("headers", {}),
|
||||
)
|
||||
|
||||
new_choices = []
|
||||
for choice in chunk["choices"]:
|
||||
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
|
||||
|
|
|
@ -127,21 +127,25 @@ class AWSEventStreamDecoder:
|
|||
async for chunk in iterator:
|
||||
event_stream_buffer.add_data(chunk)
|
||||
for event in event_stream_buffer:
|
||||
message = self._parse_message_from_event(event)
|
||||
if message:
|
||||
verbose_logger.debug("sagemaker parsed chunk bytes %s", message)
|
||||
# remove data: prefix and "\n\n" at the end
|
||||
message = (
|
||||
litellm.CustomStreamWrapper._strip_sse_data_from_chunk(message)
|
||||
or ""
|
||||
)
|
||||
message = message.replace("\n\n", "")
|
||||
try:
|
||||
message = self._parse_message_from_event(event)
|
||||
if message:
|
||||
verbose_logger.debug(
|
||||
"sagemaker parsed chunk bytes %s", message
|
||||
)
|
||||
# remove data: prefix and "\n\n" at the end
|
||||
message = (
|
||||
litellm.CustomStreamWrapper._strip_sse_data_from_chunk(
|
||||
message
|
||||
)
|
||||
or ""
|
||||
)
|
||||
message = message.replace("\n\n", "")
|
||||
|
||||
# Accumulate JSON data
|
||||
accumulated_json += message
|
||||
# Accumulate JSON data
|
||||
accumulated_json += message
|
||||
|
||||
# Try to parse the accumulated JSON
|
||||
try:
|
||||
# Try to parse the accumulated JSON
|
||||
_data = json.loads(accumulated_json)
|
||||
if self.is_messages_api:
|
||||
yield self._chunk_parser_messages_api(chunk_data=_data)
|
||||
|
@ -149,9 +153,19 @@ class AWSEventStreamDecoder:
|
|||
yield self._chunk_parser(chunk_data=_data)
|
||||
# Reset accumulated_json after successful parsing
|
||||
accumulated_json = ""
|
||||
except json.JSONDecodeError:
|
||||
# If it's not valid JSON yet, continue to the next event
|
||||
continue
|
||||
except json.JSONDecodeError:
|
||||
# If it's not valid JSON yet, continue to the next event
|
||||
continue
|
||||
except UnicodeDecodeError as e:
|
||||
verbose_logger.warning(
|
||||
f"UnicodeDecodeError: {e}. Attempting to combine with next event."
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
f"Error parsing message: {e}. Attempting to combine with next event."
|
||||
)
|
||||
continue
|
||||
|
||||
# Handle any remaining data after the iterator is exhausted
|
||||
if accumulated_json:
|
||||
|
@ -167,6 +181,8 @@ class AWSEventStreamDecoder:
|
|||
f"Warning: Unparseable JSON data remained: {accumulated_json}"
|
||||
)
|
||||
yield None
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"Final error parsing accumulated JSON: {e}")
|
||||
|
||||
def _parse_message_from_event(self, event) -> Optional[str]:
|
||||
response_dict = event.to_response_dict()
|
||||
|
|
|
@ -4453,6 +4453,42 @@
|
|||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini-2.5-pro-exp-03-25": {
|
||||
"max_tokens": 65536,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_audio_input": true,
|
||||
"supports_video_input": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-pro-exp-02-05": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 2097152,
|
||||
|
@ -10189,6 +10225,22 @@
|
|||
"litellm_provider": "voyage",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"databricks/databricks-claude-3-7-sonnet": {
|
||||
"max_tokens": 200000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"input_dbu_cost_per_token": 0.00003571,
|
||||
"output_cost_per_token": 0.00017857,
|
||||
"output_db_cost_per_token": 0.000214286,
|
||||
"litellm_provider": "databricks",
|
||||
"mode": "chat",
|
||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"databricks/databricks-meta-llama-3-1-405b-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
|
@ -10217,7 +10269,7 @@
|
|||
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"databricks/meta-llama-3.3-70b-instruct": {
|
||||
"databricks/databricks-meta-llama-3-3-70b-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
|
|
|
@ -3,7 +3,7 @@ MCP Client Manager
|
|||
|
||||
This class is responsible for managing MCP SSE clients.
|
||||
|
||||
This is a Proxy
|
||||
This is a Proxy
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[20314,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-48a36e9c9b2cb488.js\",\"250\",\"static/chunks/250-601568e45a5ffece.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-e21d4be3d6c3c16e.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"soi--ciJeUE6G2Fk4NMBG\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/1f6915676624c422.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[38411,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"274\",\"static/chunks/274-bddaf0cf6c91e72f.js\",\"250\",\"static/chunks/250-dfc03a6fb4f0d254.js\",\"699\",\"static/chunks/699-87224ecba28f1f48.js\",\"931\",\"static/chunks/app/page-0f46d4a8b9bdf1c0.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Yb50LG5p7c9QpG54GIoFV\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/1f6915676624c422.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[20314,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","394","static/chunks/394-48a36e9c9b2cb488.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","931","static/chunks/app/page-e21d4be3d6c3c16e.js"],"default",1]
|
||||
3:I[38411,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","274","static/chunks/274-bddaf0cf6c91e72f.js","250","static/chunks/250-dfc03a6fb4f0d254.js","699","static/chunks/699-87224ecba28f1f48.js","931","static/chunks/app/page-0f46d4a8b9bdf1c0.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
|
||||
3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-dfc03a6fb4f0d254.js","699","static/chunks/699-87224ecba28f1f48.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-601568e45a5ffece.js","461","static/chunks/app/onboarding/page-5110f2c6a3c9a2f4.js"],"default",1]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-1cbed529ecb084e0.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-dfc03a6fb4f0d254.js","461","static/chunks/app/onboarding/page-2bf7a26db5342dbf.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["Yb50LG5p7c9QpG54GIoFV",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/1f6915676624c422.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,33 +1,39 @@
|
|||
model_list:
|
||||
- model_name: "gpt-4o"
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: http://0.0.0.0:8090
|
||||
rpm: 3
|
||||
- model_name: "gpt-4o-mini-openai"
|
||||
litellm_params:
|
||||
model: gpt-4o-mini
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: "openai/*"
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: "bedrock-nova"
|
||||
litellm_params:
|
||||
model: us.amazon.nova-pro-v1:0
|
||||
- model_name: "gemini-2.0-flash"
|
||||
litellm_params:
|
||||
model: gemini/gemini-2.0-flash
|
||||
api_key: os.environ/GEMINI_API_KEY
|
||||
- model_name: "gpt-4o"
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: http://0.0.0.0:8090
|
||||
rpm: 3
|
||||
- model_name: "gpt-4o-mini-openai"
|
||||
litellm_params:
|
||||
model: gpt-4o-mini
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: "openai/*"
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: "bedrock-nova"
|
||||
litellm_params:
|
||||
model: us.amazon.nova-pro-v1:0
|
||||
- model_name: "gemini-2.0-flash"
|
||||
litellm_params:
|
||||
model: gemini/gemini-2.0-flash
|
||||
api_key: os.environ/GEMINI_API_KEY
|
||||
- model_name: openrouter_model
|
||||
litellm_params:
|
||||
model: openrouter/openrouter_model
|
||||
api_key: os.environ/OPENROUTER_API_KEY
|
||||
api_base: http://0.0.0.0:8090
|
||||
|
||||
|
||||
litellm_settings:
|
||||
num_retries: 0
|
||||
callbacks: ["prometheus"]
|
||||
# json_logs: true
|
||||
|
||||
# router_settings:
|
||||
# routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
|
||||
# redis_host: os.environ/REDIS_HOST
|
||||
# redis_password: os.environ/REDIS_PASSWORD
|
||||
# redis_port: os.environ/REDIS_PORT
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
|
||||
redis_host: os.environ/REDIS_HOST
|
||||
redis_password: os.environ/REDIS_PASSWORD
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
|
|
|
@ -432,6 +432,7 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/model/new",
|
||||
"/model/update",
|
||||
"/model/delete",
|
||||
"/user/daily/activity",
|
||||
] # routes that manage their own allowed/disallowed logic
|
||||
|
||||
## Org Admin Routes ##
|
||||
|
@ -2736,6 +2737,8 @@ class DailyUserSpendTransaction(TypedDict):
|
|||
completion_tokens: int
|
||||
spend: float
|
||||
api_requests: int
|
||||
successful_requests: int
|
||||
failed_requests: int
|
||||
|
||||
|
||||
class DBSpendUpdateTransactions(TypedDict):
|
||||
|
@ -2749,3 +2752,9 @@ class DBSpendUpdateTransactions(TypedDict):
|
|||
team_list_transactions: Optional[Dict[str, float]]
|
||||
team_member_list_transactions: Optional[Dict[str, float]]
|
||||
org_list_transactions: Optional[Dict[str, float]]
|
||||
|
||||
|
||||
class SpendUpdateQueueItem(TypedDict, total=False):
|
||||
entity_type: Litellm_EntityType
|
||||
entity_id: str
|
||||
response_cost: Optional[float]
|
||||
|
|
|
@ -1,53 +0,0 @@
|
|||
"""
|
||||
Checks for LiteLLM service account keys
|
||||
|
||||
"""
|
||||
|
||||
from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
|
||||
|
||||
|
||||
def check_if_token_is_service_account(valid_token: UserAPIKeyAuth) -> bool:
|
||||
"""
|
||||
Checks if the token is a service account
|
||||
|
||||
Returns:
|
||||
bool: True if token is a service account
|
||||
|
||||
"""
|
||||
if valid_token.metadata:
|
||||
if "service_account_id" in valid_token.metadata:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def service_account_checks(
|
||||
valid_token: UserAPIKeyAuth, request_data: dict
|
||||
) -> bool:
|
||||
"""
|
||||
If a virtual key is a service account, checks it's a valid service account
|
||||
|
||||
A token is a service account if it has a service_account_id in its metadata
|
||||
|
||||
Service Account Specific Checks:
|
||||
- Check if required_params is set
|
||||
"""
|
||||
|
||||
if check_if_token_is_service_account(valid_token) is not True:
|
||||
return True
|
||||
|
||||
from litellm.proxy.proxy_server import general_settings
|
||||
|
||||
if "service_account_settings" in general_settings:
|
||||
service_account_settings = general_settings["service_account_settings"]
|
||||
if "enforced_params" in service_account_settings:
|
||||
_enforced_params = service_account_settings["enforced_params"]
|
||||
for param in _enforced_params:
|
||||
if param not in request_data:
|
||||
raise ProxyException(
|
||||
type=ProxyErrorTypes.bad_request_error.value,
|
||||
code=400,
|
||||
param=param,
|
||||
message=f"BadRequest please pass param={param} in request body. This is a required param for service account",
|
||||
)
|
||||
|
||||
return True
|
|
@ -49,7 +49,6 @@ from litellm.proxy.auth.auth_utils import (
|
|||
from litellm.proxy.auth.handle_jwt import JWTAuthManager, JWTHandler
|
||||
from litellm.proxy.auth.oauth2_check import check_oauth2_token
|
||||
from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
|
||||
from litellm.proxy.auth.service_account_checks import service_account_checks
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
||||
from litellm.types.services import ServiceTypes
|
||||
|
@ -905,12 +904,6 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
|
|||
else:
|
||||
_team_obj = None
|
||||
|
||||
# Check 7: Check if key is a service account key
|
||||
await service_account_checks(
|
||||
valid_token=valid_token,
|
||||
request_data=request_data,
|
||||
)
|
||||
|
||||
user_api_key_cache.set_cache(
|
||||
key=valid_token.team_id, value=_team_obj
|
||||
) # save team table in cache - used for tpm/rpm limiting - tpm_rpm_limiter.py
|
||||
|
|
|
@ -123,6 +123,7 @@ class ProxyBaseLLMRequestProcessing:
|
|||
"""
|
||||
Common request processing logic for both chat completions and responses API endpoints
|
||||
"""
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"Request received by LiteLLM:\n{}".format(json.dumps(self.data, indent=4)),
|
||||
)
|
||||
|
|
|
@ -81,8 +81,13 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
|
|||
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
|
||||
if request is None:
|
||||
return None
|
||||
if hasattr(request, "scope") and "parsed_body" in request.scope:
|
||||
return request.scope["parsed_body"]
|
||||
if (
|
||||
hasattr(request, "scope")
|
||||
and "parsed_body" in request.scope
|
||||
and isinstance(request.scope["parsed_body"], tuple)
|
||||
):
|
||||
accepted_keys, parsed_body = request.scope["parsed_body"]
|
||||
return {key: parsed_body[key] for key in accepted_keys}
|
||||
return None
|
||||
|
||||
|
||||
|
@ -93,7 +98,7 @@ def _safe_set_request_parsed_body(
|
|||
try:
|
||||
if request is None:
|
||||
return
|
||||
request.scope["parsed_body"] = parsed_body
|
||||
request.scope["parsed_body"] = (tuple(parsed_body.keys()), parsed_body)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.debug(
|
||||
"Unexpected error setting request parsed body - {}".format(e)
|
||||
|
|
|
@ -10,7 +10,7 @@ import os
|
|||
import time
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
@ -18,13 +18,19 @@ from litellm.caching import DualCache, RedisCache
|
|||
from litellm.constants import DB_SPEND_UPDATE_JOB_NAME
|
||||
from litellm.proxy._types import (
|
||||
DB_CONNECTION_ERROR_TYPES,
|
||||
DailyUserSpendTransaction,
|
||||
DBSpendUpdateTransactions,
|
||||
Litellm_EntityType,
|
||||
LiteLLM_UserTable,
|
||||
SpendLogsPayload,
|
||||
SpendUpdateQueueItem,
|
||||
)
|
||||
from litellm.proxy.db.pod_lock_manager import PodLockManager
|
||||
from litellm.proxy.db.redis_update_buffer import RedisUpdateBuffer
|
||||
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
|
||||
DailySpendUpdateQueue,
|
||||
)
|
||||
from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
|
||||
from litellm.proxy.db.db_transaction_queue.redis_update_buffer import RedisUpdateBuffer
|
||||
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
||||
|
@ -48,10 +54,12 @@ class DBSpendUpdateWriter:
|
|||
self.redis_cache = redis_cache
|
||||
self.redis_update_buffer = RedisUpdateBuffer(redis_cache=self.redis_cache)
|
||||
self.pod_lock_manager = PodLockManager(cronjob_id=DB_SPEND_UPDATE_JOB_NAME)
|
||||
self.spend_update_queue = SpendUpdateQueue()
|
||||
self.daily_spend_update_queue = DailySpendUpdateQueue()
|
||||
|
||||
@staticmethod
|
||||
async def update_database(
|
||||
# LiteLLM management object fields
|
||||
self,
|
||||
token: Optional[str],
|
||||
user_id: Optional[str],
|
||||
end_user_id: Optional[str],
|
||||
|
@ -84,7 +92,7 @@ class DBSpendUpdateWriter:
|
|||
hashed_token = token
|
||||
|
||||
asyncio.create_task(
|
||||
DBSpendUpdateWriter._update_user_db(
|
||||
self._update_user_db(
|
||||
response_cost=response_cost,
|
||||
user_id=user_id,
|
||||
prisma_client=prisma_client,
|
||||
|
@ -94,14 +102,14 @@ class DBSpendUpdateWriter:
|
|||
)
|
||||
)
|
||||
asyncio.create_task(
|
||||
DBSpendUpdateWriter._update_key_db(
|
||||
self._update_key_db(
|
||||
response_cost=response_cost,
|
||||
hashed_token=hashed_token,
|
||||
prisma_client=prisma_client,
|
||||
)
|
||||
)
|
||||
asyncio.create_task(
|
||||
DBSpendUpdateWriter._update_team_db(
|
||||
self._update_team_db(
|
||||
response_cost=response_cost,
|
||||
team_id=team_id,
|
||||
user_id=user_id,
|
||||
|
@ -109,14 +117,14 @@ class DBSpendUpdateWriter:
|
|||
)
|
||||
)
|
||||
asyncio.create_task(
|
||||
DBSpendUpdateWriter._update_org_db(
|
||||
self._update_org_db(
|
||||
response_cost=response_cost,
|
||||
org_id=org_id,
|
||||
prisma_client=prisma_client,
|
||||
)
|
||||
)
|
||||
if disable_spend_logs is False:
|
||||
await DBSpendUpdateWriter._insert_spend_log_to_db(
|
||||
await self._insert_spend_log_to_db(
|
||||
kwargs=kwargs,
|
||||
completion_response=completion_response,
|
||||
start_time=start_time,
|
||||
|
@ -135,56 +143,8 @@ class DBSpendUpdateWriter:
|
|||
f"Error updating Prisma database: {traceback.format_exc()}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def _update_transaction_list(
|
||||
response_cost: Optional[float],
|
||||
entity_id: Optional[str],
|
||||
transaction_list: dict,
|
||||
entity_type: Litellm_EntityType,
|
||||
debug_msg: Optional[str] = None,
|
||||
prisma_client: Optional[PrismaClient] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Common helper method to update a transaction list for an entity
|
||||
|
||||
Args:
|
||||
response_cost: The cost to add
|
||||
entity_id: The ID of the entity to update
|
||||
transaction_list: The transaction list dictionary to update
|
||||
entity_type: The type of entity (from EntityType enum)
|
||||
debug_msg: Optional custom debug message
|
||||
|
||||
Returns:
|
||||
bool: True if update happened, False otherwise
|
||||
"""
|
||||
try:
|
||||
if debug_msg:
|
||||
verbose_proxy_logger.debug(debug_msg)
|
||||
else:
|
||||
verbose_proxy_logger.debug(
|
||||
f"adding spend to {entity_type.value} db. Response cost: {response_cost}. {entity_type.value}_id: {entity_id}."
|
||||
)
|
||||
if prisma_client is None:
|
||||
return False
|
||||
|
||||
if entity_id is None:
|
||||
verbose_proxy_logger.debug(
|
||||
f"track_cost_callback: {entity_type.value}_id is None. Not tracking spend for {entity_type.value}"
|
||||
)
|
||||
return False
|
||||
transaction_list[entity_id] = response_cost + transaction_list.get(
|
||||
entity_id, 0
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.info(
|
||||
f"Update {entity_type.value.capitalize()} DB failed to execute - {str(e)}\n{traceback.format_exc()}"
|
||||
)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
async def _update_key_db(
|
||||
self,
|
||||
response_cost: Optional[float],
|
||||
hashed_token: Optional[str],
|
||||
prisma_client: Optional[PrismaClient],
|
||||
|
@ -193,13 +153,12 @@ class DBSpendUpdateWriter:
|
|||
if hashed_token is None or prisma_client is None:
|
||||
return
|
||||
|
||||
await DBSpendUpdateWriter._update_transaction_list(
|
||||
response_cost=response_cost,
|
||||
entity_id=hashed_token,
|
||||
transaction_list=prisma_client.key_list_transactions,
|
||||
entity_type=Litellm_EntityType.KEY,
|
||||
debug_msg=f"adding spend to key db. Response cost: {response_cost}. Token: {hashed_token}.",
|
||||
prisma_client=prisma_client,
|
||||
await self.spend_update_queue.add_update(
|
||||
update=SpendUpdateQueueItem(
|
||||
entity_type=Litellm_EntityType.KEY,
|
||||
entity_id=hashed_token,
|
||||
response_cost=response_cost,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.exception(
|
||||
|
@ -207,8 +166,8 @@ class DBSpendUpdateWriter:
|
|||
)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
async def _update_user_db(
|
||||
self,
|
||||
response_cost: Optional[float],
|
||||
user_id: Optional[str],
|
||||
prisma_client: Optional[PrismaClient],
|
||||
|
@ -234,21 +193,21 @@ class DBSpendUpdateWriter:
|
|||
|
||||
for _id in user_ids:
|
||||
if _id is not None:
|
||||
await DBSpendUpdateWriter._update_transaction_list(
|
||||
response_cost=response_cost,
|
||||
entity_id=_id,
|
||||
transaction_list=prisma_client.user_list_transactions,
|
||||
entity_type=Litellm_EntityType.USER,
|
||||
prisma_client=prisma_client,
|
||||
await self.spend_update_queue.add_update(
|
||||
update=SpendUpdateQueueItem(
|
||||
entity_type=Litellm_EntityType.USER,
|
||||
entity_id=_id,
|
||||
response_cost=response_cost,
|
||||
)
|
||||
)
|
||||
|
||||
if end_user_id is not None:
|
||||
await DBSpendUpdateWriter._update_transaction_list(
|
||||
response_cost=response_cost,
|
||||
entity_id=end_user_id,
|
||||
transaction_list=prisma_client.end_user_list_transactions,
|
||||
entity_type=Litellm_EntityType.END_USER,
|
||||
prisma_client=prisma_client,
|
||||
await self.spend_update_queue.add_update(
|
||||
update=SpendUpdateQueueItem(
|
||||
entity_type=Litellm_EntityType.END_USER,
|
||||
entity_id=end_user_id,
|
||||
response_cost=response_cost,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.info(
|
||||
|
@ -256,8 +215,8 @@ class DBSpendUpdateWriter:
|
|||
+ f"Update User DB call failed to execute {str(e)}\n{traceback.format_exc()}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def _update_team_db(
|
||||
self,
|
||||
response_cost: Optional[float],
|
||||
team_id: Optional[str],
|
||||
user_id: Optional[str],
|
||||
|
@ -270,12 +229,12 @@ class DBSpendUpdateWriter:
|
|||
)
|
||||
return
|
||||
|
||||
await DBSpendUpdateWriter._update_transaction_list(
|
||||
response_cost=response_cost,
|
||||
entity_id=team_id,
|
||||
transaction_list=prisma_client.team_list_transactions,
|
||||
entity_type=Litellm_EntityType.TEAM,
|
||||
prisma_client=prisma_client,
|
||||
await self.spend_update_queue.add_update(
|
||||
update=SpendUpdateQueueItem(
|
||||
entity_type=Litellm_EntityType.TEAM,
|
||||
entity_id=team_id,
|
||||
response_cost=response_cost,
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
|
@ -283,12 +242,12 @@ class DBSpendUpdateWriter:
|
|||
if user_id is not None:
|
||||
# key is "team_id::<value>::user_id::<value>"
|
||||
team_member_key = f"team_id::{team_id}::user_id::{user_id}"
|
||||
await DBSpendUpdateWriter._update_transaction_list(
|
||||
response_cost=response_cost,
|
||||
entity_id=team_member_key,
|
||||
transaction_list=prisma_client.team_member_list_transactions,
|
||||
entity_type=Litellm_EntityType.TEAM_MEMBER,
|
||||
prisma_client=prisma_client,
|
||||
await self.spend_update_queue.add_update(
|
||||
update=SpendUpdateQueueItem(
|
||||
entity_type=Litellm_EntityType.TEAM_MEMBER,
|
||||
entity_id=team_member_key,
|
||||
response_cost=response_cost,
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
@ -298,8 +257,8 @@ class DBSpendUpdateWriter:
|
|||
)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
async def _update_org_db(
|
||||
self,
|
||||
response_cost: Optional[float],
|
||||
org_id: Optional[str],
|
||||
prisma_client: Optional[PrismaClient],
|
||||
|
@ -311,12 +270,12 @@ class DBSpendUpdateWriter:
|
|||
)
|
||||
return
|
||||
|
||||
await DBSpendUpdateWriter._update_transaction_list(
|
||||
response_cost=response_cost,
|
||||
entity_id=org_id,
|
||||
transaction_list=prisma_client.org_list_transactions,
|
||||
entity_type=Litellm_EntityType.ORGANIZATION,
|
||||
prisma_client=prisma_client,
|
||||
await self.spend_update_queue.add_update(
|
||||
update=SpendUpdateQueueItem(
|
||||
entity_type=Litellm_EntityType.ORGANIZATION,
|
||||
entity_id=org_id,
|
||||
response_cost=response_cost,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.info(
|
||||
|
@ -324,8 +283,8 @@ class DBSpendUpdateWriter:
|
|||
)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
async def _insert_spend_log_to_db(
|
||||
self,
|
||||
kwargs: Optional[dict],
|
||||
completion_response: Optional[Union[litellm.ModelResponse, Any, Exception]],
|
||||
start_time: Optional[datetime],
|
||||
|
@ -346,7 +305,7 @@ class DBSpendUpdateWriter:
|
|||
end_time=end_time,
|
||||
)
|
||||
payload["spend"] = response_cost or 0.0
|
||||
DBSpendUpdateWriter._set_spend_logs_payload(
|
||||
await self._set_spend_logs_payload(
|
||||
payload=payload,
|
||||
spend_logs_url=os.getenv("SPEND_LOGS_URL"),
|
||||
prisma_client=prisma_client,
|
||||
|
@ -357,8 +316,8 @@ class DBSpendUpdateWriter:
|
|||
)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
def _set_spend_logs_payload(
|
||||
async def _set_spend_logs_payload(
|
||||
self,
|
||||
payload: Union[dict, SpendLogsPayload],
|
||||
prisma_client: PrismaClient,
|
||||
spend_logs_url: Optional[str] = None,
|
||||
|
@ -377,8 +336,9 @@ class DBSpendUpdateWriter:
|
|||
elif prisma_client is not None:
|
||||
prisma_client.spend_log_transactions.append(payload)
|
||||
|
||||
prisma_client.add_spend_log_transaction_to_daily_user_transaction(
|
||||
payload.copy()
|
||||
await self.add_spend_log_transaction_to_daily_user_transaction(
|
||||
payload=payload.copy(),
|
||||
prisma_client=prisma_client,
|
||||
)
|
||||
return prisma_client
|
||||
|
||||
|
@ -435,7 +395,8 @@ class DBSpendUpdateWriter:
|
|||
- Only 1 pod will commit to db at a time (based on if it can acquire the lock over writing to DB)
|
||||
"""
|
||||
await self.redis_update_buffer.store_in_memory_spend_updates_in_redis(
|
||||
prisma_client=prisma_client,
|
||||
spend_update_queue=self.spend_update_queue,
|
||||
daily_spend_update_queue=self.daily_spend_update_queue,
|
||||
)
|
||||
|
||||
# Only commit from redis to db if this pod is the leader
|
||||
|
@ -447,12 +408,23 @@ class DBSpendUpdateWriter:
|
|||
await self.redis_update_buffer.get_all_update_transactions_from_redis_buffer()
|
||||
)
|
||||
if db_spend_update_transactions is not None:
|
||||
await DBSpendUpdateWriter._commit_spend_updates_to_db(
|
||||
await self._commit_spend_updates_to_db(
|
||||
prisma_client=prisma_client,
|
||||
n_retry_times=n_retry_times,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
db_spend_update_transactions=db_spend_update_transactions,
|
||||
)
|
||||
|
||||
daily_spend_update_transactions = (
|
||||
await self.redis_update_buffer.get_all_daily_spend_update_transactions_from_redis_buffer()
|
||||
)
|
||||
if daily_spend_update_transactions is not None:
|
||||
await DBSpendUpdateWriter.update_daily_user_spend(
|
||||
n_retry_times=n_retry_times,
|
||||
prisma_client=prisma_client,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
daily_spend_transactions=daily_spend_update_transactions,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(f"Error committing spend updates: {e}")
|
||||
finally:
|
||||
|
@ -471,23 +443,34 @@ class DBSpendUpdateWriter:
|
|||
|
||||
Note: This flow causes Deadlocks in production (1K RPS+). Use self._commit_spend_updates_to_db_with_redis() instead if you expect 1K+ RPS.
|
||||
"""
|
||||
db_spend_update_transactions = DBSpendUpdateTransactions(
|
||||
user_list_transactions=prisma_client.user_list_transactions,
|
||||
end_user_list_transactions=prisma_client.end_user_list_transactions,
|
||||
key_list_transactions=prisma_client.key_list_transactions,
|
||||
team_list_transactions=prisma_client.team_list_transactions,
|
||||
team_member_list_transactions=prisma_client.team_member_list_transactions,
|
||||
org_list_transactions=prisma_client.org_list_transactions,
|
||||
|
||||
# Aggregate all in memory spend updates (key, user, end_user, team, team_member, org) and commit to db
|
||||
################## Spend Update Transactions ##################
|
||||
db_spend_update_transactions = (
|
||||
await self.spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
|
||||
)
|
||||
await DBSpendUpdateWriter._commit_spend_updates_to_db(
|
||||
await self._commit_spend_updates_to_db(
|
||||
prisma_client=prisma_client,
|
||||
n_retry_times=n_retry_times,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
db_spend_update_transactions=db_spend_update_transactions,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
################## Daily Spend Update Transactions ##################
|
||||
# Aggregate all in memory daily spend transactions and commit to db
|
||||
daily_spend_update_transactions = (
|
||||
await self.daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
|
||||
)
|
||||
|
||||
await DBSpendUpdateWriter.update_daily_user_spend(
|
||||
n_retry_times=n_retry_times,
|
||||
prisma_client=prisma_client,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
daily_spend_transactions=daily_spend_update_transactions,
|
||||
)
|
||||
|
||||
async def _commit_spend_updates_to_db( # noqa: PLR0915
|
||||
self,
|
||||
prisma_client: PrismaClient,
|
||||
n_retry_times: int,
|
||||
proxy_logging_obj: ProxyLogging,
|
||||
|
@ -526,9 +509,6 @@ class DBSpendUpdateWriter:
|
|||
where={"user_id": user_id},
|
||||
data={"spend": {"increment": response_cost}},
|
||||
)
|
||||
prisma_client.user_list_transactions = (
|
||||
{}
|
||||
) # Clear the remaining transactions after processing all batches in the loop.
|
||||
break
|
||||
except DB_CONNECTION_ERROR_TYPES as e:
|
||||
if (
|
||||
|
@ -561,6 +541,7 @@ class DBSpendUpdateWriter:
|
|||
n_retry_times=n_retry_times,
|
||||
prisma_client=prisma_client,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
end_user_list_transactions=end_user_list_transactions,
|
||||
)
|
||||
### UPDATE KEY TABLE ###
|
||||
key_list_transactions = db_spend_update_transactions["key_list_transactions"]
|
||||
|
@ -583,9 +564,6 @@ class DBSpendUpdateWriter:
|
|||
where={"token": token},
|
||||
data={"spend": {"increment": response_cost}},
|
||||
)
|
||||
prisma_client.key_list_transactions = (
|
||||
{}
|
||||
) # Clear the remaining transactions after processing all batches in the loop.
|
||||
break
|
||||
except DB_CONNECTION_ERROR_TYPES as e:
|
||||
if (
|
||||
|
@ -632,9 +610,6 @@ class DBSpendUpdateWriter:
|
|||
where={"team_id": team_id},
|
||||
data={"spend": {"increment": response_cost}},
|
||||
)
|
||||
prisma_client.team_list_transactions = (
|
||||
{}
|
||||
) # Clear the remaining transactions after processing all batches in the loop.
|
||||
break
|
||||
except DB_CONNECTION_ERROR_TYPES as e:
|
||||
if (
|
||||
|
@ -684,9 +659,6 @@ class DBSpendUpdateWriter:
|
|||
where={"team_id": team_id, "user_id": user_id},
|
||||
data={"spend": {"increment": response_cost}},
|
||||
)
|
||||
prisma_client.team_member_list_transactions = (
|
||||
{}
|
||||
) # Clear the remaining transactions after processing all batches in the loop.
|
||||
break
|
||||
except DB_CONNECTION_ERROR_TYPES as e:
|
||||
if (
|
||||
|
@ -725,9 +697,6 @@ class DBSpendUpdateWriter:
|
|||
where={"organization_id": org_id},
|
||||
data={"spend": {"increment": response_cost}},
|
||||
)
|
||||
prisma_client.org_list_transactions = (
|
||||
{}
|
||||
) # Clear the remaining transactions after processing all batches in the loop.
|
||||
break
|
||||
except DB_CONNECTION_ERROR_TYPES as e:
|
||||
if (
|
||||
|
@ -744,3 +713,192 @@ class DBSpendUpdateWriter:
|
|||
_raise_failed_update_spend_exception(
|
||||
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def update_daily_user_spend(
|
||||
n_retry_times: int,
|
||||
prisma_client: PrismaClient,
|
||||
proxy_logging_obj: ProxyLogging,
|
||||
daily_spend_transactions: Dict[str, DailyUserSpendTransaction],
|
||||
):
|
||||
"""
|
||||
Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
|
||||
"""
|
||||
from litellm.proxy.utils import _raise_failed_update_spend_exception
|
||||
|
||||
### UPDATE DAILY USER SPEND ###
|
||||
verbose_proxy_logger.debug(
|
||||
"Daily User Spend transactions: {}".format(len(daily_spend_transactions))
|
||||
)
|
||||
BATCH_SIZE = (
|
||||
100 # Number of aggregated records to update in each database operation
|
||||
)
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
for i in range(n_retry_times + 1):
|
||||
try:
|
||||
# Get transactions to process
|
||||
transactions_to_process = dict(
|
||||
list(daily_spend_transactions.items())[:BATCH_SIZE]
|
||||
)
|
||||
|
||||
if len(transactions_to_process) == 0:
|
||||
verbose_proxy_logger.debug(
|
||||
"No new transactions to process for daily spend update"
|
||||
)
|
||||
break
|
||||
|
||||
# Update DailyUserSpend table in batches
|
||||
async with prisma_client.db.batch_() as batcher:
|
||||
for _, transaction in transactions_to_process.items():
|
||||
user_id = transaction.get("user_id")
|
||||
if not user_id: # Skip if no user_id
|
||||
continue
|
||||
|
||||
batcher.litellm_dailyuserspend.upsert(
|
||||
where={
|
||||
"user_id_date_api_key_model_custom_llm_provider": {
|
||||
"user_id": user_id,
|
||||
"date": transaction["date"],
|
||||
"api_key": transaction["api_key"],
|
||||
"model": transaction["model"],
|
||||
"custom_llm_provider": transaction.get(
|
||||
"custom_llm_provider"
|
||||
),
|
||||
}
|
||||
},
|
||||
data={
|
||||
"create": {
|
||||
"user_id": user_id,
|
||||
"date": transaction["date"],
|
||||
"api_key": transaction["api_key"],
|
||||
"model": transaction["model"],
|
||||
"model_group": transaction.get("model_group"),
|
||||
"custom_llm_provider": transaction.get(
|
||||
"custom_llm_provider"
|
||||
),
|
||||
"prompt_tokens": transaction["prompt_tokens"],
|
||||
"completion_tokens": transaction[
|
||||
"completion_tokens"
|
||||
],
|
||||
"spend": transaction["spend"],
|
||||
"api_requests": transaction["api_requests"],
|
||||
"successful_requests": transaction[
|
||||
"successful_requests"
|
||||
],
|
||||
"failed_requests": transaction[
|
||||
"failed_requests"
|
||||
],
|
||||
},
|
||||
"update": {
|
||||
"prompt_tokens": {
|
||||
"increment": transaction["prompt_tokens"]
|
||||
},
|
||||
"completion_tokens": {
|
||||
"increment": transaction[
|
||||
"completion_tokens"
|
||||
]
|
||||
},
|
||||
"spend": {"increment": transaction["spend"]},
|
||||
"api_requests": {
|
||||
"increment": transaction["api_requests"]
|
||||
},
|
||||
"successful_requests": {
|
||||
"increment": transaction[
|
||||
"successful_requests"
|
||||
]
|
||||
},
|
||||
"failed_requests": {
|
||||
"increment": transaction["failed_requests"]
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
verbose_proxy_logger.info(
|
||||
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
|
||||
)
|
||||
|
||||
# Remove processed transactions
|
||||
for key in transactions_to_process.keys():
|
||||
daily_spend_transactions.pop(key, None)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
|
||||
)
|
||||
break
|
||||
|
||||
except DB_CONNECTION_ERROR_TYPES as e:
|
||||
if i >= n_retry_times:
|
||||
_raise_failed_update_spend_exception(
|
||||
e=e,
|
||||
start_time=start_time,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
)
|
||||
await asyncio.sleep(2**i) # Exponential backoff
|
||||
|
||||
except Exception as e:
|
||||
# Remove processed transactions even if there was an error
|
||||
if "transactions_to_process" in locals():
|
||||
for key in transactions_to_process.keys(): # type: ignore
|
||||
daily_spend_transactions.pop(key, None)
|
||||
_raise_failed_update_spend_exception(
|
||||
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
|
||||
)
|
||||
|
||||
async def add_spend_log_transaction_to_daily_user_transaction(
|
||||
self,
|
||||
payload: Union[dict, SpendLogsPayload],
|
||||
prisma_client: PrismaClient,
|
||||
):
|
||||
"""
|
||||
Add a spend log transaction to the `daily_spend_update_queue`
|
||||
|
||||
Key = @@unique([user_id, date, api_key, model, custom_llm_provider]) )
|
||||
|
||||
If key exists, update the transaction with the new spend and usage
|
||||
"""
|
||||
expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"]
|
||||
|
||||
if not all(key in payload for key in expected_keys):
|
||||
verbose_proxy_logger.debug(
|
||||
f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions"
|
||||
)
|
||||
return
|
||||
|
||||
request_status = prisma_client.get_request_status(payload)
|
||||
verbose_proxy_logger.info(f"Logged request status: {request_status}")
|
||||
if isinstance(payload["startTime"], datetime):
|
||||
start_time = payload["startTime"].isoformat()
|
||||
date = start_time.split("T")[0]
|
||||
elif isinstance(payload["startTime"], str):
|
||||
date = payload["startTime"].split("T")[0]
|
||||
else:
|
||||
verbose_proxy_logger.debug(
|
||||
f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions"
|
||||
)
|
||||
return
|
||||
try:
|
||||
daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}"
|
||||
daily_transaction = DailyUserSpendTransaction(
|
||||
user_id=payload["user"],
|
||||
date=date,
|
||||
api_key=payload["api_key"],
|
||||
model=payload["model"],
|
||||
model_group=payload["model_group"],
|
||||
custom_llm_provider=payload["custom_llm_provider"],
|
||||
prompt_tokens=payload["prompt_tokens"],
|
||||
completion_tokens=payload["completion_tokens"],
|
||||
spend=payload["spend"],
|
||||
api_requests=1,
|
||||
successful_requests=1 if request_status == "success" else 0,
|
||||
failed_requests=1 if request_status != "success" else 0,
|
||||
)
|
||||
|
||||
await self.daily_spend_update_queue.add_update(
|
||||
update={daily_transaction_key: daily_transaction}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
|
25
litellm/proxy/db/db_transaction_queue/base_update_queue.py
Normal file
25
litellm/proxy/db/db_transaction_queue/base_update_queue.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
"""
|
||||
Base class for in memory buffer for database transactions
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
|
||||
class BaseUpdateQueue:
|
||||
"""Base class for in memory buffer for database transactions"""
|
||||
|
||||
def __init__(self):
|
||||
self.update_queue = asyncio.Queue()
|
||||
|
||||
async def add_update(self, update):
|
||||
"""Enqueue an update."""
|
||||
verbose_proxy_logger.debug("Adding update to queue: %s", update)
|
||||
await self.update_queue.put(update)
|
||||
|
||||
async def flush_all_updates_from_in_memory_queue(self):
|
||||
"""Get all updates from the queue."""
|
||||
updates = []
|
||||
while not self.update_queue.empty():
|
||||
updates.append(await self.update_queue.get())
|
||||
return updates
|
|
@ -0,0 +1,95 @@
|
|||
import asyncio
|
||||
from typing import Dict, List
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import DailyUserSpendTransaction
|
||||
from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
|
||||
|
||||
|
||||
class DailySpendUpdateQueue(BaseUpdateQueue):
|
||||
"""
|
||||
In memory buffer for daily spend updates that should be committed to the database
|
||||
|
||||
To add a new daily spend update transaction, use the following format:
|
||||
daily_spend_update_queue.add_update({
|
||||
"user1_date_api_key_model_custom_llm_provider": {
|
||||
"spend": 10,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 100,
|
||||
}
|
||||
})
|
||||
|
||||
Queue contains a list of daily spend update transactions
|
||||
|
||||
eg
|
||||
queue = [
|
||||
{
|
||||
"user1_date_api_key_model_custom_llm_provider": {
|
||||
"spend": 10,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 100,
|
||||
"api_requests": 100,
|
||||
"successful_requests": 100,
|
||||
"failed_requests": 100,
|
||||
}
|
||||
},
|
||||
{
|
||||
"user2_date_api_key_model_custom_llm_provider": {
|
||||
"spend": 10,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 100,
|
||||
"api_requests": 100,
|
||||
"successful_requests": 100,
|
||||
"failed_requests": 100,
|
||||
}
|
||||
}
|
||||
]
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.update_queue: asyncio.Queue[
|
||||
Dict[str, DailyUserSpendTransaction]
|
||||
] = asyncio.Queue()
|
||||
|
||||
async def flush_and_get_aggregated_daily_spend_update_transactions(
|
||||
self,
|
||||
) -> Dict[str, DailyUserSpendTransaction]:
|
||||
"""Get all updates from the queue and return all updates aggregated by daily_transaction_key."""
|
||||
updates = await self.flush_all_updates_from_in_memory_queue()
|
||||
aggregated_daily_spend_update_transactions = (
|
||||
DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
|
||||
updates
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
"Aggregated daily spend update transactions: %s",
|
||||
aggregated_daily_spend_update_transactions,
|
||||
)
|
||||
return aggregated_daily_spend_update_transactions
|
||||
|
||||
@staticmethod
|
||||
def get_aggregated_daily_spend_update_transactions(
|
||||
updates: List[Dict[str, DailyUserSpendTransaction]]
|
||||
) -> Dict[str, DailyUserSpendTransaction]:
|
||||
"""Aggregate updates by daily_transaction_key."""
|
||||
aggregated_daily_spend_update_transactions: Dict[
|
||||
str, DailyUserSpendTransaction
|
||||
] = {}
|
||||
for _update in updates:
|
||||
for _key, payload in _update.items():
|
||||
if _key in aggregated_daily_spend_update_transactions:
|
||||
daily_transaction = aggregated_daily_spend_update_transactions[_key]
|
||||
daily_transaction["spend"] += payload["spend"]
|
||||
daily_transaction["prompt_tokens"] += payload["prompt_tokens"]
|
||||
daily_transaction["completion_tokens"] += payload[
|
||||
"completion_tokens"
|
||||
]
|
||||
daily_transaction["api_requests"] += payload["api_requests"]
|
||||
daily_transaction["successful_requests"] += payload[
|
||||
"successful_requests"
|
||||
]
|
||||
daily_transaction["failed_requests"] += payload["failed_requests"]
|
||||
else:
|
||||
aggregated_daily_spend_update_transactions[_key] = payload
|
||||
return aggregated_daily_spend_update_transactions
|
|
@ -9,9 +9,17 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import RedisCache
|
||||
from litellm.constants import MAX_REDIS_BUFFER_DEQUEUE_COUNT, REDIS_UPDATE_BUFFER_KEY
|
||||
from litellm.constants import (
|
||||
MAX_REDIS_BUFFER_DEQUEUE_COUNT,
|
||||
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
|
||||
REDIS_UPDATE_BUFFER_KEY,
|
||||
)
|
||||
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
||||
from litellm.proxy._types import DBSpendUpdateTransactions
|
||||
from litellm.proxy._types import DailyUserSpendTransaction, DBSpendUpdateTransactions
|
||||
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
|
||||
DailySpendUpdateQueue,
|
||||
)
|
||||
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
|
||||
from litellm.secret_managers.main import str_to_bool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -54,37 +62,70 @@ class RedisUpdateBuffer:
|
|||
|
||||
async def store_in_memory_spend_updates_in_redis(
|
||||
self,
|
||||
prisma_client: PrismaClient,
|
||||
spend_update_queue: SpendUpdateQueue,
|
||||
daily_spend_update_queue: DailySpendUpdateQueue,
|
||||
):
|
||||
"""
|
||||
Stores the in-memory spend updates to Redis
|
||||
|
||||
Each transaction is a dict stored as following:
|
||||
- key is the entity id
|
||||
- value is the spend amount
|
||||
Stores the following in memory data structures in Redis:
|
||||
- SpendUpdateQueue - Key, User, Team, TeamMember, Org, EndUser Spend updates
|
||||
- DailySpendUpdateQueue - Daily Spend updates Aggregate view
|
||||
|
||||
```
|
||||
Redis List:
|
||||
key_list_transactions:
|
||||
[
|
||||
"0929880201": 1.2,
|
||||
"0929880202": 0.01,
|
||||
"0929880203": 0.001,
|
||||
]
|
||||
```
|
||||
For SpendUpdateQueue:
|
||||
Each transaction is a dict stored as following:
|
||||
- key is the entity id
|
||||
- value is the spend amount
|
||||
|
||||
```
|
||||
Redis List:
|
||||
key_list_transactions:
|
||||
[
|
||||
"0929880201": 1.2,
|
||||
"0929880202": 0.01,
|
||||
"0929880203": 0.001,
|
||||
]
|
||||
```
|
||||
|
||||
For DailySpendUpdateQueue:
|
||||
Each transaction is a Dict[str, DailyUserSpendTransaction] stored as following:
|
||||
- key is the daily_transaction_key
|
||||
- value is the DailyUserSpendTransaction
|
||||
|
||||
```
|
||||
Redis List:
|
||||
daily_spend_update_transactions:
|
||||
[
|
||||
{
|
||||
"user_keyhash_1_model_1": {
|
||||
"spend": 1.2,
|
||||
"prompt_tokens": 1000,
|
||||
"completion_tokens": 1000,
|
||||
"api_requests": 1000,
|
||||
"successful_requests": 1000,
|
||||
},
|
||||
|
||||
}
|
||||
]
|
||||
```
|
||||
"""
|
||||
if self.redis_cache is None:
|
||||
verbose_proxy_logger.debug(
|
||||
"redis_cache is None, skipping store_in_memory_spend_updates_in_redis"
|
||||
)
|
||||
return
|
||||
db_spend_update_transactions: DBSpendUpdateTransactions = DBSpendUpdateTransactions(
|
||||
user_list_transactions=prisma_client.user_list_transactions,
|
||||
end_user_list_transactions=prisma_client.end_user_list_transactions,
|
||||
key_list_transactions=prisma_client.key_list_transactions,
|
||||
team_list_transactions=prisma_client.team_list_transactions,
|
||||
team_member_list_transactions=prisma_client.team_member_list_transactions,
|
||||
org_list_transactions=prisma_client.org_list_transactions,
|
||||
|
||||
db_spend_update_transactions = (
|
||||
await spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
"ALL DB SPEND UPDATE TRANSACTIONS: %s", db_spend_update_transactions
|
||||
)
|
||||
daily_spend_update_transactions = (
|
||||
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
"ALL DAILY SPEND UPDATE TRANSACTIONS: %s", daily_spend_update_transactions
|
||||
)
|
||||
|
||||
# only store in redis if there are any updates to commit
|
||||
|
@ -100,8 +141,13 @@ class RedisUpdateBuffer:
|
|||
values=list_of_transactions,
|
||||
)
|
||||
|
||||
# clear the in-memory spend updates
|
||||
RedisUpdateBuffer._clear_all_in_memory_spend_updates(prisma_client)
|
||||
list_of_daily_spend_update_transactions = [
|
||||
safe_dumps(daily_spend_update_transactions)
|
||||
]
|
||||
await self.redis_cache.async_rpush(
|
||||
key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
|
||||
values=list_of_daily_spend_update_transactions,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _number_of_transactions_to_store_in_redis(
|
||||
|
@ -116,20 +162,6 @@ class RedisUpdateBuffer:
|
|||
num_transactions += len(v)
|
||||
return num_transactions
|
||||
|
||||
@staticmethod
|
||||
def _clear_all_in_memory_spend_updates(
|
||||
prisma_client: PrismaClient,
|
||||
):
|
||||
"""
|
||||
Clears all in-memory spend updates
|
||||
"""
|
||||
prisma_client.user_list_transactions = {}
|
||||
prisma_client.end_user_list_transactions = {}
|
||||
prisma_client.key_list_transactions = {}
|
||||
prisma_client.team_list_transactions = {}
|
||||
prisma_client.team_member_list_transactions = {}
|
||||
prisma_client.org_list_transactions = {}
|
||||
|
||||
@staticmethod
|
||||
def _remove_prefix_from_keys(data: Dict[str, Any], prefix: str) -> Dict[str, Any]:
|
||||
"""
|
||||
|
@ -197,6 +229,27 @@ class RedisUpdateBuffer:
|
|||
|
||||
return combined_transaction
|
||||
|
||||
async def get_all_daily_spend_update_transactions_from_redis_buffer(
|
||||
self,
|
||||
) -> Optional[Dict[str, DailyUserSpendTransaction]]:
|
||||
"""
|
||||
Gets all the daily spend update transactions from Redis
|
||||
"""
|
||||
if self.redis_cache is None:
|
||||
return None
|
||||
list_of_transactions = await self.redis_cache.async_lpop(
|
||||
key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
|
||||
count=MAX_REDIS_BUFFER_DEQUEUE_COUNT,
|
||||
)
|
||||
if list_of_transactions is None:
|
||||
return None
|
||||
list_of_daily_spend_update_transactions = [
|
||||
json.loads(transaction) for transaction in list_of_transactions
|
||||
]
|
||||
return DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
|
||||
list_of_daily_spend_update_transactions
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_list_of_transactions(
|
||||
list_of_transactions: Union[Any, List[Any]],
|
113
litellm/proxy/db/db_transaction_queue/spend_update_queue.py
Normal file
113
litellm/proxy/db/db_transaction_queue/spend_update_queue.py
Normal file
|
@ -0,0 +1,113 @@
|
|||
import asyncio
|
||||
from typing import List
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import (
|
||||
DBSpendUpdateTransactions,
|
||||
Litellm_EntityType,
|
||||
SpendUpdateQueueItem,
|
||||
)
|
||||
from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
|
||||
|
||||
|
||||
class SpendUpdateQueue(BaseUpdateQueue):
|
||||
"""
|
||||
In memory buffer for spend updates that should be committed to the database
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.update_queue: asyncio.Queue[SpendUpdateQueueItem] = asyncio.Queue()
|
||||
|
||||
async def flush_and_get_aggregated_db_spend_update_transactions(
|
||||
self,
|
||||
) -> DBSpendUpdateTransactions:
|
||||
"""Flush all updates from the queue and return all updates aggregated by entity type."""
|
||||
updates = await self.flush_all_updates_from_in_memory_queue()
|
||||
verbose_proxy_logger.debug("Aggregating updates by entity type: %s", updates)
|
||||
return self.get_aggregated_db_spend_update_transactions(updates)
|
||||
|
||||
def get_aggregated_db_spend_update_transactions(
|
||||
self, updates: List[SpendUpdateQueueItem]
|
||||
) -> DBSpendUpdateTransactions:
|
||||
"""Aggregate updates by entity type."""
|
||||
# Initialize all transaction lists as empty dicts
|
||||
db_spend_update_transactions = DBSpendUpdateTransactions(
|
||||
user_list_transactions={},
|
||||
end_user_list_transactions={},
|
||||
key_list_transactions={},
|
||||
team_list_transactions={},
|
||||
team_member_list_transactions={},
|
||||
org_list_transactions={},
|
||||
)
|
||||
|
||||
# Map entity types to their corresponding transaction dictionary keys
|
||||
entity_type_to_dict_key = {
|
||||
Litellm_EntityType.USER: "user_list_transactions",
|
||||
Litellm_EntityType.END_USER: "end_user_list_transactions",
|
||||
Litellm_EntityType.KEY: "key_list_transactions",
|
||||
Litellm_EntityType.TEAM: "team_list_transactions",
|
||||
Litellm_EntityType.TEAM_MEMBER: "team_member_list_transactions",
|
||||
Litellm_EntityType.ORGANIZATION: "org_list_transactions",
|
||||
}
|
||||
|
||||
for update in updates:
|
||||
entity_type = update.get("entity_type")
|
||||
entity_id = update.get("entity_id") or ""
|
||||
response_cost = update.get("response_cost") or 0
|
||||
|
||||
if entity_type is None:
|
||||
verbose_proxy_logger.debug(
|
||||
"Skipping update spend for update: %s, because entity_type is None",
|
||||
update,
|
||||
)
|
||||
continue
|
||||
|
||||
dict_key = entity_type_to_dict_key.get(entity_type)
|
||||
if dict_key is None:
|
||||
verbose_proxy_logger.debug(
|
||||
"Skipping update spend for update: %s, because entity_type is not in entity_type_to_dict_key",
|
||||
update,
|
||||
)
|
||||
continue # Skip unknown entity types
|
||||
|
||||
# Type-safe access using if/elif statements
|
||||
if dict_key == "user_list_transactions":
|
||||
transactions_dict = db_spend_update_transactions[
|
||||
"user_list_transactions"
|
||||
]
|
||||
elif dict_key == "end_user_list_transactions":
|
||||
transactions_dict = db_spend_update_transactions[
|
||||
"end_user_list_transactions"
|
||||
]
|
||||
elif dict_key == "key_list_transactions":
|
||||
transactions_dict = db_spend_update_transactions[
|
||||
"key_list_transactions"
|
||||
]
|
||||
elif dict_key == "team_list_transactions":
|
||||
transactions_dict = db_spend_update_transactions[
|
||||
"team_list_transactions"
|
||||
]
|
||||
elif dict_key == "team_member_list_transactions":
|
||||
transactions_dict = db_spend_update_transactions[
|
||||
"team_member_list_transactions"
|
||||
]
|
||||
elif dict_key == "org_list_transactions":
|
||||
transactions_dict = db_spend_update_transactions[
|
||||
"org_list_transactions"
|
||||
]
|
||||
else:
|
||||
continue
|
||||
|
||||
if transactions_dict is None:
|
||||
transactions_dict = {}
|
||||
|
||||
# type ignore: dict_key is guaranteed to be one of "one of ("user_list_transactions", "end_user_list_transactions", "key_list_transactions", "team_list_transactions", "team_member_list_transactions", "org_list_transactions")"
|
||||
db_spend_update_transactions[dict_key] = transactions_dict # type: ignore
|
||||
|
||||
if entity_id not in transactions_dict:
|
||||
transactions_dict[entity_id] = 0
|
||||
|
||||
transactions_dict[entity_id] += response_cost or 0
|
||||
|
||||
return db_spend_update_transactions
|
15
litellm/proxy/example_config_yaml/spend_tracking_config.yaml
Normal file
15
litellm/proxy/example_config_yaml/spend_tracking_config.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
general_settings:
|
||||
use_redis_transaction_buffer: true
|
||||
|
||||
litellm_settings:
|
||||
cache: True
|
||||
cache_params:
|
||||
type: redis
|
||||
supported_call_types: []
|
|
@ -14,6 +14,7 @@ from pydantic import BaseModel
|
|||
from websockets.asyncio.client import ClientConnection, connect
|
||||
|
||||
from litellm import DualCache
|
||||
from litellm._version import version as litellm_version
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
|
@ -75,7 +76,9 @@ class AimGuardrail(CustomGuardrail):
|
|||
) -> Union[Exception, str, dict, None]:
|
||||
verbose_proxy_logger.debug("Inside AIM Pre-Call Hook")
|
||||
|
||||
await self.call_aim_guardrail(data, hook="pre_call")
|
||||
await self.call_aim_guardrail(
|
||||
data, hook="pre_call", key_alias=user_api_key_dict.key_alias
|
||||
)
|
||||
return data
|
||||
|
||||
async def async_moderation_hook(
|
||||
|
@ -93,15 +96,18 @@ class AimGuardrail(CustomGuardrail):
|
|||
) -> Union[Exception, str, dict, None]:
|
||||
verbose_proxy_logger.debug("Inside AIM Moderation Hook")
|
||||
|
||||
await self.call_aim_guardrail(data, hook="moderation")
|
||||
await self.call_aim_guardrail(
|
||||
data, hook="moderation", key_alias=user_api_key_dict.key_alias
|
||||
)
|
||||
return data
|
||||
|
||||
async def call_aim_guardrail(self, data: dict, hook: str) -> None:
|
||||
async def call_aim_guardrail(
|
||||
self, data: dict, hook: str, key_alias: Optional[str]
|
||||
) -> None:
|
||||
user_email = data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"x-aim-litellm-hook": hook,
|
||||
} | ({"x-aim-user-email": user_email} if user_email else {})
|
||||
headers = self._build_aim_headers(
|
||||
hook=hook, key_alias=key_alias, user_email=user_email
|
||||
)
|
||||
response = await self.async_handler.post(
|
||||
f"{self.api_base}/detect/openai",
|
||||
headers=headers,
|
||||
|
@ -120,18 +126,16 @@ class AimGuardrail(CustomGuardrail):
|
|||
raise HTTPException(status_code=400, detail=res["detection_message"])
|
||||
|
||||
async def call_aim_guardrail_on_output(
|
||||
self, request_data: dict, output: str, hook: str
|
||||
self, request_data: dict, output: str, hook: str, key_alias: Optional[str]
|
||||
) -> Optional[str]:
|
||||
user_email = (
|
||||
request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
|
||||
)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"x-aim-litellm-hook": hook,
|
||||
} | ({"x-aim-user-email": user_email} if user_email else {})
|
||||
response = await self.async_handler.post(
|
||||
f"{self.api_base}/detect/output",
|
||||
headers=headers,
|
||||
headers=self._build_aim_headers(
|
||||
hook=hook, key_alias=key_alias, user_email=user_email
|
||||
),
|
||||
json={"output": output, "messages": request_data.get("messages", [])},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
@ -147,6 +151,32 @@ class AimGuardrail(CustomGuardrail):
|
|||
return res["detection_message"]
|
||||
return None
|
||||
|
||||
def _build_aim_headers(
|
||||
self, *, hook: str, key_alias: Optional[str], user_email: Optional[str]
|
||||
):
|
||||
"""
|
||||
A helper function to build the http headers that are required by AIM guardrails.
|
||||
"""
|
||||
return (
|
||||
{
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
# Used by Aim to apply only the guardrails that should be applied in a specific request phase.
|
||||
"x-aim-litellm-hook": hook,
|
||||
# Used by Aim to track LiteLLM version and provide backward compatibility.
|
||||
"x-aim-litellm-version": litellm_version,
|
||||
}
|
||||
# Used by Aim to track guardrails violations by user.
|
||||
| ({"x-aim-user-email": user_email} if user_email else {})
|
||||
| (
|
||||
{
|
||||
# Used by Aim apply only the guardrails that are associated with the key alias.
|
||||
"x-aim-litellm-key-alias": key_alias,
|
||||
}
|
||||
if key_alias
|
||||
else {}
|
||||
)
|
||||
)
|
||||
|
||||
async def async_post_call_success_hook(
|
||||
self,
|
||||
data: dict,
|
||||
|
@ -160,7 +190,7 @@ class AimGuardrail(CustomGuardrail):
|
|||
):
|
||||
content = response.choices[0].message.content or ""
|
||||
detection = await self.call_aim_guardrail_on_output(
|
||||
data, content, hook="output"
|
||||
data, content, hook="output", key_alias=user_api_key_dict.key_alias
|
||||
)
|
||||
if detection:
|
||||
raise HTTPException(status_code=400, detail=detection)
|
||||
|
@ -174,11 +204,13 @@ class AimGuardrail(CustomGuardrail):
|
|||
user_email = (
|
||||
request_data.get("metadata", {}).get("headers", {}).get("x-aim-user-email")
|
||||
)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
} | ({"x-aim-user-email": user_email} if user_email else {})
|
||||
async with connect(
|
||||
f"{self.ws_api_base}/detect/output/ws", additional_headers=headers
|
||||
f"{self.ws_api_base}/detect/output/ws",
|
||||
additional_headers=self._build_aim_headers(
|
||||
hook="output",
|
||||
key_alias=user_api_key_dict.key_alias,
|
||||
user_email=user_email,
|
||||
),
|
||||
) as websocket:
|
||||
sender = asyncio.create_task(
|
||||
self.forward_the_stream_to_aim(websocket, response)
|
||||
|
|
|
@ -13,7 +13,6 @@ from litellm.litellm_core_utils.core_helpers import (
|
|||
from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.auth.auth_checks import log_db_metrics
|
||||
from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
|
||||
from litellm.proxy.utils import ProxyUpdateSpend
|
||||
from litellm.types.utils import (
|
||||
StandardLoggingPayload,
|
||||
|
@ -37,6 +36,8 @@ class _ProxyDBLogger(CustomLogger):
|
|||
if _ProxyDBLogger._should_track_errors_in_db() is False:
|
||||
return
|
||||
|
||||
from litellm.proxy.proxy_server import proxy_logging_obj
|
||||
|
||||
_metadata = dict(
|
||||
StandardLoggingUserAPIKeyMetadata(
|
||||
user_api_key_hash=user_api_key_dict.api_key,
|
||||
|
@ -66,7 +67,7 @@ class _ProxyDBLogger(CustomLogger):
|
|||
request_data.get("proxy_server_request") or {}
|
||||
)
|
||||
request_data["litellm_params"]["metadata"] = existing_metadata
|
||||
await DBSpendUpdateWriter.update_database(
|
||||
await proxy_logging_obj.db_spend_update_writer.update_database(
|
||||
token=user_api_key_dict.api_key,
|
||||
response_cost=0.0,
|
||||
user_id=user_api_key_dict.user_id,
|
||||
|
@ -136,7 +137,7 @@ class _ProxyDBLogger(CustomLogger):
|
|||
end_user_id=end_user_id,
|
||||
):
|
||||
## UPDATE DATABASE
|
||||
await DBSpendUpdateWriter.update_database(
|
||||
await proxy_logging_obj.db_spend_update_writer.update_database(
|
||||
token=user_api_key,
|
||||
response_cost=response_cost,
|
||||
user_id=user_id,
|
||||
|
|
|
@ -747,7 +747,10 @@ def _get_enforced_params(
|
|||
enforced_params: Optional[list] = None
|
||||
if general_settings is not None:
|
||||
enforced_params = general_settings.get("enforced_params")
|
||||
if "service_account_settings" in general_settings:
|
||||
if (
|
||||
"service_account_settings" in general_settings
|
||||
and check_if_token_is_service_account(user_api_key_dict) is True
|
||||
):
|
||||
service_account_settings = general_settings["service_account_settings"]
|
||||
if "enforced_params" in service_account_settings:
|
||||
if enforced_params is None:
|
||||
|
@ -760,6 +763,20 @@ def _get_enforced_params(
|
|||
return enforced_params
|
||||
|
||||
|
||||
def check_if_token_is_service_account(valid_token: UserAPIKeyAuth) -> bool:
|
||||
"""
|
||||
Checks if the token is a service account
|
||||
|
||||
Returns:
|
||||
bool: True if token is a service account
|
||||
|
||||
"""
|
||||
if valid_token.metadata:
|
||||
if "service_account_id" in valid_token.metadata:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _enforced_params_check(
|
||||
request_body: dict,
|
||||
general_settings: Optional[dict],
|
||||
|
|
|
@ -1259,19 +1259,43 @@ class SpendMetrics(BaseModel):
|
|||
prompt_tokens: int = Field(default=0)
|
||||
completion_tokens: int = Field(default=0)
|
||||
total_tokens: int = Field(default=0)
|
||||
successful_requests: int = Field(default=0)
|
||||
failed_requests: int = Field(default=0)
|
||||
api_requests: int = Field(default=0)
|
||||
|
||||
|
||||
class MetricBase(BaseModel):
|
||||
metrics: SpendMetrics
|
||||
|
||||
|
||||
class MetricWithMetadata(MetricBase):
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class KeyMetadata(BaseModel):
|
||||
"""Metadata for a key"""
|
||||
|
||||
key_alias: Optional[str] = None
|
||||
|
||||
|
||||
class KeyMetricWithMetadata(MetricBase):
|
||||
"""Base class for metrics with additional metadata"""
|
||||
|
||||
metadata: KeyMetadata = Field(default_factory=KeyMetadata)
|
||||
|
||||
|
||||
class BreakdownMetrics(BaseModel):
|
||||
"""Breakdown of spend by different dimensions"""
|
||||
|
||||
models: Dict[str, SpendMetrics] = Field(default_factory=dict) # model -> metrics
|
||||
providers: Dict[str, SpendMetrics] = Field(
|
||||
models: Dict[str, MetricWithMetadata] = Field(
|
||||
default_factory=dict
|
||||
) # provider -> metrics
|
||||
api_keys: Dict[str, SpendMetrics] = Field(
|
||||
) # model -> {metrics, metadata}
|
||||
providers: Dict[str, MetricWithMetadata] = Field(
|
||||
default_factory=dict
|
||||
) # api_key -> metrics
|
||||
) # provider -> {metrics, metadata}
|
||||
api_keys: Dict[str, KeyMetricWithMetadata] = Field(
|
||||
default_factory=dict
|
||||
) # api_key -> {metrics, metadata}
|
||||
|
||||
|
||||
class DailySpendData(BaseModel):
|
||||
|
@ -1284,7 +1308,10 @@ class DailySpendMetadata(BaseModel):
|
|||
total_spend: float = Field(default=0.0)
|
||||
total_prompt_tokens: int = Field(default=0)
|
||||
total_completion_tokens: int = Field(default=0)
|
||||
total_tokens: int = Field(default=0)
|
||||
total_api_requests: int = Field(default=0)
|
||||
total_successful_requests: int = Field(default=0)
|
||||
total_failed_requests: int = Field(default=0)
|
||||
page: int = Field(default=1)
|
||||
total_pages: int = Field(default=1)
|
||||
has_more: bool = Field(default=False)
|
||||
|
@ -1307,6 +1334,8 @@ class LiteLLM_DailyUserSpend(BaseModel):
|
|||
completion_tokens: int = 0
|
||||
spend: float = 0.0
|
||||
api_requests: int = 0
|
||||
successful_requests: int = 0
|
||||
failed_requests: int = 0
|
||||
|
||||
|
||||
class GroupedData(TypedDict):
|
||||
|
@ -1322,34 +1351,57 @@ def update_metrics(
|
|||
group_metrics.completion_tokens += record.completion_tokens
|
||||
group_metrics.total_tokens += record.prompt_tokens + record.completion_tokens
|
||||
group_metrics.api_requests += record.api_requests
|
||||
group_metrics.successful_requests += record.successful_requests
|
||||
group_metrics.failed_requests += record.failed_requests
|
||||
return group_metrics
|
||||
|
||||
|
||||
def update_breakdown_metrics(
|
||||
breakdown: BreakdownMetrics, record: LiteLLM_DailyUserSpend
|
||||
breakdown: BreakdownMetrics,
|
||||
record: LiteLLM_DailyUserSpend,
|
||||
model_metadata: Dict[str, Dict[str, Any]],
|
||||
provider_metadata: Dict[str, Dict[str, Any]],
|
||||
api_key_metadata: Dict[str, Dict[str, Any]],
|
||||
) -> BreakdownMetrics:
|
||||
"""Updates breakdown metrics for a single record using the existing update_metrics function"""
|
||||
|
||||
# Update model breakdown
|
||||
if record.model not in breakdown.models:
|
||||
breakdown.models[record.model] = SpendMetrics()
|
||||
breakdown.models[record.model] = update_metrics(
|
||||
breakdown.models[record.model], record
|
||||
breakdown.models[record.model] = MetricWithMetadata(
|
||||
metrics=SpendMetrics(),
|
||||
metadata=model_metadata.get(
|
||||
record.model, {}
|
||||
), # Add any model-specific metadata here
|
||||
)
|
||||
breakdown.models[record.model].metrics = update_metrics(
|
||||
breakdown.models[record.model].metrics, record
|
||||
)
|
||||
|
||||
# Update provider breakdown
|
||||
provider = record.custom_llm_provider or "unknown"
|
||||
if provider not in breakdown.providers:
|
||||
breakdown.providers[provider] = SpendMetrics()
|
||||
breakdown.providers[provider] = update_metrics(
|
||||
breakdown.providers[provider], record
|
||||
breakdown.providers[provider] = MetricWithMetadata(
|
||||
metrics=SpendMetrics(),
|
||||
metadata=provider_metadata.get(
|
||||
provider, {}
|
||||
), # Add any provider-specific metadata here
|
||||
)
|
||||
breakdown.providers[provider].metrics = update_metrics(
|
||||
breakdown.providers[provider].metrics, record
|
||||
)
|
||||
|
||||
# Update api key breakdown
|
||||
if record.api_key not in breakdown.api_keys:
|
||||
breakdown.api_keys[record.api_key] = SpendMetrics()
|
||||
breakdown.api_keys[record.api_key] = update_metrics(
|
||||
breakdown.api_keys[record.api_key], record
|
||||
breakdown.api_keys[record.api_key] = KeyMetricWithMetadata(
|
||||
metrics=SpendMetrics(),
|
||||
metadata=KeyMetadata(
|
||||
key_alias=api_key_metadata.get(record.api_key, {}).get(
|
||||
"key_alias", None
|
||||
)
|
||||
), # Add any api_key-specific metadata here
|
||||
)
|
||||
breakdown.api_keys[record.api_key].metrics = update_metrics(
|
||||
breakdown.api_keys[record.api_key].metrics, record
|
||||
)
|
||||
|
||||
return breakdown
|
||||
|
@ -1428,6 +1480,14 @@ async def get_user_daily_activity(
|
|||
if api_key:
|
||||
where_conditions["api_key"] = api_key
|
||||
|
||||
if (
|
||||
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
||||
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
||||
):
|
||||
where_conditions[
|
||||
"user_id"
|
||||
] = user_api_key_dict.user_id # only allow access to own data
|
||||
|
||||
# Get total count for pagination
|
||||
total_count = await prisma_client.db.litellm_dailyuserspend.count(
|
||||
where=where_conditions
|
||||
|
@ -1443,6 +1503,28 @@ async def get_user_daily_activity(
|
|||
take=page_size,
|
||||
)
|
||||
|
||||
daily_spend_data_pydantic_list = [
|
||||
LiteLLM_DailyUserSpend(**record.model_dump()) for record in daily_spend_data
|
||||
]
|
||||
|
||||
# Get all unique API keys from the spend data
|
||||
api_keys = set()
|
||||
for record in daily_spend_data_pydantic_list:
|
||||
if record.api_key:
|
||||
api_keys.add(record.api_key)
|
||||
|
||||
# Fetch key aliases in bulk
|
||||
|
||||
api_key_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
model_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
provider_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
if api_keys:
|
||||
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
|
||||
where={"token": {"in": list(api_keys)}}
|
||||
)
|
||||
api_key_metadata.update(
|
||||
{k.token: {"key_alias": k.key_alias} for k in key_records}
|
||||
)
|
||||
# Process results
|
||||
results = []
|
||||
total_metrics = SpendMetrics()
|
||||
|
@ -1450,7 +1532,7 @@ async def get_user_daily_activity(
|
|||
# Group data by date and other dimensions
|
||||
|
||||
grouped_data: Dict[str, Dict[str, Any]] = {}
|
||||
for record in daily_spend_data:
|
||||
for record in daily_spend_data_pydantic_list:
|
||||
date_str = record.date
|
||||
if date_str not in grouped_data:
|
||||
grouped_data[date_str] = {
|
||||
|
@ -1464,7 +1546,11 @@ async def get_user_daily_activity(
|
|||
)
|
||||
# Update breakdowns
|
||||
grouped_data[date_str]["breakdown"] = update_breakdown_metrics(
|
||||
grouped_data[date_str]["breakdown"], record
|
||||
grouped_data[date_str]["breakdown"],
|
||||
record,
|
||||
model_metadata,
|
||||
provider_metadata,
|
||||
api_key_metadata,
|
||||
)
|
||||
|
||||
# Update total metrics
|
||||
|
@ -1474,7 +1560,9 @@ async def get_user_daily_activity(
|
|||
total_metrics.total_tokens += (
|
||||
record.prompt_tokens + record.completion_tokens
|
||||
)
|
||||
total_metrics.api_requests += 1
|
||||
total_metrics.api_requests += record.api_requests
|
||||
total_metrics.successful_requests += record.successful_requests
|
||||
total_metrics.failed_requests += record.failed_requests
|
||||
|
||||
# Convert grouped data to response format
|
||||
for date_str, data in grouped_data.items():
|
||||
|
@ -1495,7 +1583,10 @@ async def get_user_daily_activity(
|
|||
total_spend=total_metrics.spend,
|
||||
total_prompt_tokens=total_metrics.prompt_tokens,
|
||||
total_completion_tokens=total_metrics.completion_tokens,
|
||||
total_tokens=total_metrics.total_tokens,
|
||||
total_api_requests=total_metrics.api_requests,
|
||||
total_successful_requests=total_metrics.successful_requests,
|
||||
total_failed_requests=total_metrics.failed_requests,
|
||||
page=page,
|
||||
total_pages=-(-total_count // page_size), # Ceiling division
|
||||
has_more=(page * page_size) < total_count,
|
||||
|
|
|
@ -394,7 +394,7 @@ class ModelManagementAuthChecks:
|
|||
|
||||
@staticmethod
|
||||
async def can_user_make_model_call(
|
||||
model_params: Union[Deployment, updateDeployment],
|
||||
model_params: Deployment,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
prisma_client: PrismaClient,
|
||||
premium_user: bool,
|
||||
|
@ -723,8 +723,38 @@ async def update_model(
|
|||
},
|
||||
)
|
||||
|
||||
_model_id = None
|
||||
_model_info = getattr(model_params, "model_info", None)
|
||||
if _model_info is None:
|
||||
raise Exception("model_info not provided")
|
||||
|
||||
_model_id = _model_info.id
|
||||
if _model_id is None:
|
||||
raise Exception("model_info.id not provided")
|
||||
|
||||
_existing_litellm_params = (
|
||||
await prisma_client.db.litellm_proxymodeltable.find_unique(
|
||||
where={"model_id": _model_id}
|
||||
)
|
||||
)
|
||||
|
||||
if _existing_litellm_params is None:
|
||||
if (
|
||||
llm_router is not None
|
||||
and llm_router.get_deployment(model_id=_model_id) is not None
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit."
|
||||
},
|
||||
)
|
||||
else:
|
||||
raise Exception("model not found")
|
||||
deployment = Deployment(**_existing_litellm_params.model_dump())
|
||||
|
||||
await ModelManagementAuthChecks.can_user_make_model_call(
|
||||
model_params=model_params,
|
||||
model_params=deployment,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
prisma_client=prisma_client,
|
||||
premium_user=premium_user,
|
||||
|
@ -732,31 +762,6 @@ async def update_model(
|
|||
|
||||
# update DB
|
||||
if store_model_in_db is True:
|
||||
_model_id = None
|
||||
_model_info = getattr(model_params, "model_info", None)
|
||||
if _model_info is None:
|
||||
raise Exception("model_info not provided")
|
||||
|
||||
_model_id = _model_info.id
|
||||
if _model_id is None:
|
||||
raise Exception("model_info.id not provided")
|
||||
_existing_litellm_params = (
|
||||
await prisma_client.db.litellm_proxymodeltable.find_unique(
|
||||
where={"model_id": _model_id}
|
||||
)
|
||||
)
|
||||
if _existing_litellm_params is None:
|
||||
if (
|
||||
llm_router is not None
|
||||
and llm_router.get_deployment(model_id=_model_id) is not None
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit."
|
||||
},
|
||||
)
|
||||
raise Exception("model not found")
|
||||
_existing_litellm_params_dict = dict(
|
||||
_existing_litellm_params.litellm_params
|
||||
)
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
model_list:
|
||||
- model_name: gpt-4o
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/gpt-4o
|
||||
api_key: sk-xxxxxxx
|
||||
|
||||
mcp_servers:
|
||||
{
|
||||
"zapier_mcp": {
|
||||
"url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
|
||||
},
|
||||
"fetch": {
|
||||
"url": "http://localhost:8000/sse"
|
||||
}
|
||||
}
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
|
|
@ -3308,15 +3308,6 @@ async def model_list(
|
|||
tags=["chat/completions"],
|
||||
responses={200: {"description": "Successful response"}, **ERROR_RESPONSES},
|
||||
) # azure compatible endpoint
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
Exception, # base exception to catch for the backoff
|
||||
max_tries=global_max_parallel_request_retries, # maximum number of retries
|
||||
max_time=global_max_parallel_request_retry_timeout, # maximum total time to retry for
|
||||
on_backoff=on_backoff, # specifying the function to call on backoff
|
||||
giveup=giveup,
|
||||
logger=verbose_proxy_logger,
|
||||
)
|
||||
async def chat_completion( # noqa: PLR0915
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
|
|
|
@ -327,6 +327,8 @@ model LiteLLM_DailyUserSpend {
|
|||
completion_tokens Int @default(0)
|
||||
spend Float @default(0.0)
|
||||
api_requests Int @default(0)
|
||||
successful_requests Int @default(0)
|
||||
failed_requests Int @default(0)
|
||||
created_at DateTime @default(now())
|
||||
updated_at DateTime @updatedAt
|
||||
|
||||
|
@ -352,4 +354,3 @@ enum JobStatus {
|
|||
INACTIVE
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -10,14 +10,24 @@ import traceback
|
|||
from datetime import datetime, timedelta
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, overload
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Dict,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Union,
|
||||
cast,
|
||||
overload,
|
||||
)
|
||||
|
||||
from litellm.proxy._types import (
|
||||
DB_CONNECTION_ERROR_TYPES,
|
||||
CommonProxyErrors,
|
||||
DailyUserSpendTransaction,
|
||||
ProxyErrorTypes,
|
||||
ProxyException,
|
||||
SpendLogsMetadata,
|
||||
SpendLogsPayload,
|
||||
)
|
||||
from litellm.types.guardrails import GuardrailEventHooks
|
||||
|
@ -1100,14 +1110,7 @@ def jsonify_object(data: dict) -> dict:
|
|||
|
||||
|
||||
class PrismaClient:
|
||||
user_list_transactions: dict = {}
|
||||
end_user_list_transactions: dict = {}
|
||||
key_list_transactions: dict = {}
|
||||
team_list_transactions: dict = {}
|
||||
team_member_list_transactions: dict = {} # key is ["team_id" + "user_id"]
|
||||
org_list_transactions: dict = {}
|
||||
spend_log_transactions: List = []
|
||||
daily_user_spend_transactions: Dict[str, DailyUserSpendTransaction] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -1145,62 +1148,40 @@ class PrismaClient:
|
|||
) # Client to connect to Prisma db
|
||||
verbose_proxy_logger.debug("Success - Created Prisma Client")
|
||||
|
||||
def add_spend_log_transaction_to_daily_user_transaction(
|
||||
def get_request_status(
|
||||
self, payload: Union[dict, SpendLogsPayload]
|
||||
):
|
||||
) -> Literal["success", "failure"]:
|
||||
"""
|
||||
Add a spend log transaction to the daily user transaction list
|
||||
Determine if a request was successful or failed based on payload metadata.
|
||||
|
||||
Key = @@unique([user_id, date, api_key, model, custom_llm_provider]) )
|
||||
Args:
|
||||
payload (Union[dict, SpendLogsPayload]): Request payload containing metadata
|
||||
|
||||
If key exists, update the transaction with the new spend and usage
|
||||
Returns:
|
||||
Literal["success", "failure"]: Request status
|
||||
"""
|
||||
expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"]
|
||||
if not all(key in payload for key in expected_keys):
|
||||
verbose_proxy_logger.debug(
|
||||
f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions"
|
||||
)
|
||||
return
|
||||
|
||||
if isinstance(payload["startTime"], datetime):
|
||||
start_time = payload["startTime"].isoformat()
|
||||
date = start_time.split("T")[0]
|
||||
elif isinstance(payload["startTime"], str):
|
||||
date = payload["startTime"].split("T")[0]
|
||||
else:
|
||||
verbose_proxy_logger.debug(
|
||||
f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions"
|
||||
)
|
||||
return
|
||||
try:
|
||||
daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}"
|
||||
if daily_transaction_key in self.daily_user_spend_transactions:
|
||||
daily_transaction = self.daily_user_spend_transactions[
|
||||
daily_transaction_key
|
||||
]
|
||||
daily_transaction["spend"] += payload["spend"]
|
||||
daily_transaction["prompt_tokens"] += payload["prompt_tokens"]
|
||||
daily_transaction["completion_tokens"] += payload["completion_tokens"]
|
||||
daily_transaction["api_requests"] += 1
|
||||
else:
|
||||
daily_transaction = DailyUserSpendTransaction(
|
||||
user_id=payload["user"],
|
||||
date=date,
|
||||
api_key=payload["api_key"],
|
||||
model=payload["model"],
|
||||
model_group=payload["model_group"],
|
||||
custom_llm_provider=payload["custom_llm_provider"],
|
||||
prompt_tokens=payload["prompt_tokens"],
|
||||
completion_tokens=payload["completion_tokens"],
|
||||
spend=payload["spend"],
|
||||
api_requests=1,
|
||||
# Get metadata and convert to dict if it's a JSON string
|
||||
payload_metadata: Union[Dict, SpendLogsMetadata, str] = payload.get(
|
||||
"metadata", {}
|
||||
)
|
||||
if isinstance(payload_metadata, str):
|
||||
payload_metadata_json: Union[Dict, SpendLogsMetadata] = cast(
|
||||
Dict, json.loads(payload_metadata)
|
||||
)
|
||||
else:
|
||||
payload_metadata_json = payload_metadata
|
||||
|
||||
self.daily_user_spend_transactions[
|
||||
daily_transaction_key
|
||||
] = daily_transaction
|
||||
except Exception as e:
|
||||
raise e
|
||||
# Check status in metadata dict
|
||||
return (
|
||||
"failure"
|
||||
if payload_metadata_json.get("status") == "failure"
|
||||
else "success"
|
||||
)
|
||||
|
||||
except (json.JSONDecodeError, AttributeError):
|
||||
# Default to success if metadata parsing fails
|
||||
return "success"
|
||||
|
||||
def hash_token(self, token: str):
|
||||
# Hash the string using SHA-256
|
||||
|
@ -2422,7 +2403,10 @@ def _hash_token_if_needed(token: str) -> str:
|
|||
class ProxyUpdateSpend:
|
||||
@staticmethod
|
||||
async def update_end_user_spend(
|
||||
n_retry_times: int, prisma_client: PrismaClient, proxy_logging_obj: ProxyLogging
|
||||
n_retry_times: int,
|
||||
prisma_client: PrismaClient,
|
||||
proxy_logging_obj: ProxyLogging,
|
||||
end_user_list_transactions: Dict[str, float],
|
||||
):
|
||||
for i in range(n_retry_times + 1):
|
||||
start_time = time.time()
|
||||
|
@ -2434,7 +2418,7 @@ class ProxyUpdateSpend:
|
|||
for (
|
||||
end_user_id,
|
||||
response_cost,
|
||||
) in prisma_client.end_user_list_transactions.items():
|
||||
) in end_user_list_transactions.items():
|
||||
if litellm.max_end_user_budget is not None:
|
||||
pass
|
||||
batcher.litellm_endusertable.upsert(
|
||||
|
@ -2461,10 +2445,6 @@ class ProxyUpdateSpend:
|
|||
_raise_failed_update_spend_exception(
|
||||
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
|
||||
)
|
||||
finally:
|
||||
prisma_client.end_user_list_transactions = (
|
||||
{}
|
||||
) # reset the end user list transactions - prevent bad data from causing issues
|
||||
|
||||
@staticmethod
|
||||
async def update_spend_logs(
|
||||
|
@ -2538,120 +2518,6 @@ class ProxyUpdateSpend:
|
|||
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def update_daily_user_spend(
|
||||
n_retry_times: int,
|
||||
prisma_client: PrismaClient,
|
||||
proxy_logging_obj: ProxyLogging,
|
||||
):
|
||||
"""
|
||||
Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
|
||||
"""
|
||||
BATCH_SIZE = (
|
||||
100 # Number of aggregated records to update in each database operation
|
||||
)
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
for i in range(n_retry_times + 1):
|
||||
try:
|
||||
# Get transactions to process
|
||||
transactions_to_process = dict(
|
||||
list(prisma_client.daily_user_spend_transactions.items())[
|
||||
:BATCH_SIZE
|
||||
]
|
||||
)
|
||||
|
||||
if len(transactions_to_process) == 0:
|
||||
verbose_proxy_logger.debug(
|
||||
"No new transactions to process for daily spend update"
|
||||
)
|
||||
break
|
||||
|
||||
# Update DailyUserSpend table in batches
|
||||
async with prisma_client.db.batch_() as batcher:
|
||||
for _, transaction in transactions_to_process.items():
|
||||
user_id = transaction.get("user_id")
|
||||
if not user_id: # Skip if no user_id
|
||||
continue
|
||||
|
||||
batcher.litellm_dailyuserspend.upsert(
|
||||
where={
|
||||
"user_id_date_api_key_model_custom_llm_provider": {
|
||||
"user_id": user_id,
|
||||
"date": transaction["date"],
|
||||
"api_key": transaction["api_key"],
|
||||
"model": transaction["model"],
|
||||
"custom_llm_provider": transaction.get(
|
||||
"custom_llm_provider"
|
||||
),
|
||||
}
|
||||
},
|
||||
data={
|
||||
"create": {
|
||||
"user_id": user_id,
|
||||
"date": transaction["date"],
|
||||
"api_key": transaction["api_key"],
|
||||
"model": transaction["model"],
|
||||
"model_group": transaction.get("model_group"),
|
||||
"custom_llm_provider": transaction.get(
|
||||
"custom_llm_provider"
|
||||
),
|
||||
"prompt_tokens": transaction["prompt_tokens"],
|
||||
"completion_tokens": transaction[
|
||||
"completion_tokens"
|
||||
],
|
||||
"spend": transaction["spend"],
|
||||
"api_requests": transaction["api_requests"],
|
||||
},
|
||||
"update": {
|
||||
"prompt_tokens": {
|
||||
"increment": transaction["prompt_tokens"]
|
||||
},
|
||||
"completion_tokens": {
|
||||
"increment": transaction[
|
||||
"completion_tokens"
|
||||
]
|
||||
},
|
||||
"spend": {"increment": transaction["spend"]},
|
||||
"api_requests": {
|
||||
"increment": transaction["api_requests"]
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
verbose_proxy_logger.info(
|
||||
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
|
||||
)
|
||||
|
||||
# Remove processed transactions
|
||||
for key in transactions_to_process.keys():
|
||||
prisma_client.daily_user_spend_transactions.pop(key, None)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
|
||||
)
|
||||
break
|
||||
|
||||
except DB_CONNECTION_ERROR_TYPES as e:
|
||||
if i >= n_retry_times:
|
||||
_raise_failed_update_spend_exception(
|
||||
e=e,
|
||||
start_time=start_time,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
)
|
||||
await asyncio.sleep(2**i) # Exponential backoff
|
||||
|
||||
except Exception as e:
|
||||
# Remove processed transactions even if there was an error
|
||||
if "transactions_to_process" in locals():
|
||||
for key in transactions_to_process.keys(): # type: ignore
|
||||
prisma_client.daily_user_spend_transactions.pop(key, None)
|
||||
_raise_failed_update_spend_exception(
|
||||
e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def disable_spend_updates() -> bool:
|
||||
"""
|
||||
|
@ -2701,20 +2567,6 @@ async def update_spend( # noqa: PLR0915
|
|||
db_writer_client=db_writer_client,
|
||||
)
|
||||
|
||||
### UPDATE DAILY USER SPEND ###
|
||||
verbose_proxy_logger.debug(
|
||||
"Daily User Spend transactions: {}".format(
|
||||
len(prisma_client.daily_user_spend_transactions)
|
||||
)
|
||||
)
|
||||
|
||||
if len(prisma_client.daily_user_spend_transactions) > 0:
|
||||
await ProxyUpdateSpend.update_daily_user_spend(
|
||||
n_retry_times=n_retry_times,
|
||||
prisma_client=prisma_client,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
)
|
||||
|
||||
|
||||
def _raise_failed_update_spend_exception(
|
||||
e: Exception, start_time: float, proxy_logging_obj: ProxyLogging
|
||||
|
|
83
litellm/types/llms/anthropic_messages/anthropic_response.py
Normal file
83
litellm/types/llms/anthropic_messages/anthropic_response.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
|
||||
class AnthropicResponseTextBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Text Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
citations: Optional[List[Dict[str, Any]]]
|
||||
text: str
|
||||
type: Literal["text"]
|
||||
|
||||
|
||||
class AnthropicResponseToolUseBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Tool Use Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
id: Optional[str]
|
||||
input: Optional[str]
|
||||
name: Optional[str]
|
||||
type: Literal["tool_use"]
|
||||
|
||||
|
||||
class AnthropicResponseThinkingBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Thinking Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
signature: Optional[str]
|
||||
thinking: Optional[str]
|
||||
type: Literal["thinking"]
|
||||
|
||||
|
||||
class AnthropicResponseRedactedThinkingBlock(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Response Redacted Thinking Block: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
data: Optional[str]
|
||||
type: Literal["redacted_thinking"]
|
||||
|
||||
|
||||
AnthropicResponseContentBlock: TypeAlias = Union[
|
||||
AnthropicResponseTextBlock,
|
||||
AnthropicResponseToolUseBlock,
|
||||
AnthropicResponseThinkingBlock,
|
||||
AnthropicResponseRedactedThinkingBlock,
|
||||
]
|
||||
|
||||
|
||||
class AnthropicUsage(TypedDict, total=False):
|
||||
"""
|
||||
Input and output tokens used in the request
|
||||
"""
|
||||
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
|
||||
"""
|
||||
Cache Tokens Used
|
||||
"""
|
||||
cache_creation_input_tokens: int
|
||||
cache_read_input_tokens: int
|
||||
|
||||
|
||||
class AnthropicMessagesResponse(TypedDict, total=False):
|
||||
"""
|
||||
Anthropic Messages API Response: https://docs.anthropic.com/en/api/messages
|
||||
"""
|
||||
|
||||
content: Optional[List[AnthropicResponseContentBlock]]
|
||||
id: str
|
||||
model: Optional[str] # This represents the Model type from Anthropic
|
||||
role: Optional[Literal["assistant"]]
|
||||
stop_reason: Optional[
|
||||
Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
|
||||
]
|
||||
stop_sequence: Optional[str]
|
||||
type: Optional[Literal["message"]]
|
||||
usage: Optional[AnthropicUsage]
|
|
@ -1113,3 +1113,6 @@ ResponsesAPIStreamingResponse = Annotated[
|
|||
],
|
||||
Discriminator("type"),
|
||||
]
|
||||
|
||||
|
||||
REASONING_EFFORT = Literal["low", "medium", "high"]
|
||||
|
|
9
litellm/types/llms/openrouter.py
Normal file
9
litellm/types/llms/openrouter.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
import json
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
|
||||
|
||||
|
||||
class OpenRouterErrorMessage(TypedDict):
|
||||
message: str
|
||||
code: int
|
||||
metadata: Dict
|
|
@ -5901,9 +5901,10 @@ class ModelResponseIterator:
|
|||
|
||||
|
||||
class ModelResponseListIterator:
|
||||
def __init__(self, model_responses):
|
||||
def __init__(self, model_responses, delay: Optional[float] = None):
|
||||
self.model_responses = model_responses
|
||||
self.index = 0
|
||||
self.delay = delay
|
||||
|
||||
# Sync iterator
|
||||
def __iter__(self):
|
||||
|
@ -5914,6 +5915,8 @@ class ModelResponseListIterator:
|
|||
raise StopIteration
|
||||
model_response = self.model_responses[self.index]
|
||||
self.index += 1
|
||||
if self.delay:
|
||||
time.sleep(self.delay)
|
||||
return model_response
|
||||
|
||||
# Async iterator
|
||||
|
@ -5925,6 +5928,8 @@ class ModelResponseListIterator:
|
|||
raise StopAsyncIteration
|
||||
model_response = self.model_responses[self.index]
|
||||
self.index += 1
|
||||
if self.delay:
|
||||
await asyncio.sleep(self.delay)
|
||||
return model_response
|
||||
|
||||
|
||||
|
|
|
@ -4453,6 +4453,42 @@
|
|||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini-2.5-pro-exp-03-25": {
|
||||
"max_tokens": 65536,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_audio_input": true,
|
||||
"supports_video_input": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-pro-exp-02-05": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 2097152,
|
||||
|
@ -10189,6 +10225,22 @@
|
|||
"litellm_provider": "voyage",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"databricks/databricks-claude-3-7-sonnet": {
|
||||
"max_tokens": 200000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"input_dbu_cost_per_token": 0.00003571,
|
||||
"output_cost_per_token": 0.00017857,
|
||||
"output_db_cost_per_token": 0.000214286,
|
||||
"litellm_provider": "databricks",
|
||||
"mode": "chat",
|
||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"databricks/databricks-meta-llama-3-1-405b-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
|
@ -10217,7 +10269,7 @@
|
|||
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"databricks/meta-llama-3.3-70b-instruct": {
|
||||
"databricks/databricks-meta-llama-3-3-70b-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
|
|
1
mypy.ini
1
mypy.ini
|
@ -2,6 +2,7 @@
|
|||
warn_return_any = False
|
||||
ignore_missing_imports = True
|
||||
mypy_path = litellm/stubs
|
||||
namespace_packages = True
|
||||
|
||||
[mypy-google.*]
|
||||
ignore_missing_imports = True
|
||||
|
|
87
poetry.lock
generated
87
poetry.lock
generated
|
@ -1151,69 +1151,6 @@ files = [
|
|||
[package.extras]
|
||||
protobuf = ["grpcio-tools (>=1.70.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio"
|
||||
version = "1.71.0"
|
||||
description = "HTTP/2-based RPC framework"
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509"},
|
||||
{file = "grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444"},
|
||||
{file = "grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a"},
|
||||
{file = "grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb"},
|
||||
{file = "grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c6a0a28450c16809f94e0b5bfe52cabff63e7e4b97b44123ebf77f448534d07d"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:a371e6b6a5379d3692cc4ea1cb92754d2a47bdddeee755d3203d1f84ae08e03e"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:39983a9245d37394fd59de71e88c4b295eb510a3555e0a847d9965088cdbd033"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9182e0063112e55e74ee7584769ec5a0b4f18252c35787f48738627e23a62b97"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693bc706c031aeb848849b9d1c6b63ae6bcc64057984bb91a542332b75aa4c3d"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20e8f653abd5ec606be69540f57289274c9ca503ed38388481e98fa396ed0b41"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8700a2a57771cc43ea295296330daaddc0d93c088f0a35cc969292b6db959bf3"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d35a95f05a8a2cbe8e02be137740138b3b2ea5f80bd004444e4f9a1ffc511e32"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-win32.whl", hash = "sha256:f9c30c464cb2ddfbc2ddf9400287701270fdc0f14be5f08a1e3939f1e749b455"},
|
||||
{file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"},
|
||||
{file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
protobuf = ["grpcio-tools (>=1.71.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-status"
|
||||
version = "1.70.0"
|
||||
|
@ -1230,22 +1167,6 @@ googleapis-common-protos = ">=1.5.5"
|
|||
grpcio = ">=1.70.0"
|
||||
protobuf = ">=5.26.1,<6.0dev"
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-status"
|
||||
version = "1.71.0"
|
||||
description = "Status proto mapping for gRPC"
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"},
|
||||
{file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
googleapis-common-protos = ">=1.5.5"
|
||||
grpcio = ">=1.71.0"
|
||||
protobuf = ">=5.26.1,<6.0dev"
|
||||
|
||||
[[package]]
|
||||
name = "gunicorn"
|
||||
version = "23.0.0"
|
||||
|
@ -1678,13 +1599,13 @@ referencing = ">=0.31.0"
|
|||
|
||||
[[package]]
|
||||
name = "litellm-proxy-extras"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
|
||||
optional = true
|
||||
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
|
||||
files = [
|
||||
{file = "litellm_proxy_extras-0.1.1-py3-none-any.whl", hash = "sha256:2b3c4c5474bacbde2424c1cd13b21f85c65e9c4346f6159badd49a210eedef5c"},
|
||||
{file = "litellm_proxy_extras-0.1.1.tar.gz", hash = "sha256:a1eb911ad2e3742238863d314a8bd6d02dd0cc213ba040b2c0593f132fbf3117"},
|
||||
{file = "litellm_proxy_extras-0.1.2-py3-none-any.whl", hash = "sha256:2caa7bdba5a533cd1781b55e3f7c581138d2a5b68a7e6d737327669dd21d5e08"},
|
||||
{file = "litellm_proxy_extras-0.1.2.tar.gz", hash = "sha256:218e97980ab5a34eed7dcd1564a910c9a790168d672cdec3c464eba9b7cb1518"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4135,4 +4056,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi",
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||
content-hash = "16cbf20784776377805f5e33c6bc97dce76303132aa3d81c7e6fe743f0ee3fc1"
|
||||
content-hash = "524b2f8276ba057f8dc8a79dd460c1a243ef4aece7c08a8bf344e029e07b8841"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.65.1"
|
||||
version = "1.65.2"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -55,7 +55,7 @@ websockets = {version = "^13.1.0", optional = true}
|
|||
boto3 = {version = "1.34.34", optional = true}
|
||||
redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
|
||||
mcp = {version = "1.5.0", optional = true, python = ">=3.10"}
|
||||
litellm-proxy-extras = {version = "0.1.1", optional = true}
|
||||
litellm-proxy-extras = {version = "0.1.2", optional = true}
|
||||
|
||||
[tool.poetry.extras]
|
||||
proxy = [
|
||||
|
@ -117,7 +117,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.65.1"
|
||||
version = "1.65.2"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -38,7 +38,7 @@ sentry_sdk==2.21.0 # for sentry error handling
|
|||
detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests
|
||||
cryptography==43.0.1
|
||||
tzdata==2025.1 # IANA time zone database
|
||||
litellm-proxy-extras==0.1.1 # for proxy extras - e.g. prisma migrations
|
||||
litellm-proxy-extras==0.1.2 # for proxy extras - e.g. prisma migrations
|
||||
|
||||
### LITELLM PACKAGE DEPENDENCIES
|
||||
python-dotenv==1.0.0 # for env
|
||||
|
|
|
@ -327,6 +327,8 @@ model LiteLLM_DailyUserSpend {
|
|||
completion_tokens Int @default(0)
|
||||
spend Float @default(0.0)
|
||||
api_requests Int @default(0)
|
||||
successful_requests Int @default(0)
|
||||
failed_requests Int @default(0)
|
||||
created_at DateTime @default(now())
|
||||
updated_at DateTime @updatedAt
|
||||
|
||||
|
@ -351,3 +353,4 @@ enum JobStatus {
|
|||
ACTIVE
|
||||
INACTIVE
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
@ -19,6 +20,7 @@ from litellm.types.utils import (
|
|||
Delta,
|
||||
ModelResponseStream,
|
||||
PromptTokensDetailsWrapper,
|
||||
StandardLoggingPayload,
|
||||
StreamingChoices,
|
||||
Usage,
|
||||
)
|
||||
|
@ -36,6 +38,22 @@ def initialized_custom_stream_wrapper() -> CustomStreamWrapper:
|
|||
return streaming_handler
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def logging_obj() -> Logging:
|
||||
import time
|
||||
|
||||
logging_obj = Logging(
|
||||
model="my-random-model",
|
||||
messages=[{"role": "user", "content": "Hey"}],
|
||||
stream=True,
|
||||
call_type="completion",
|
||||
start_time=time.time(),
|
||||
litellm_call_id="12345",
|
||||
function_id="1245",
|
||||
)
|
||||
return logging_obj
|
||||
|
||||
|
||||
bedrock_chunks = [
|
||||
ModelResponseStream(
|
||||
id="chatcmpl-d249def8-a78b-464c-87b5-3a6f43565292",
|
||||
|
@ -577,3 +595,36 @@ def test_streaming_handler_with_stop_chunk(
|
|||
**args, model_response=ModelResponseStream()
|
||||
)
|
||||
assert returned_chunk is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_completion_start_time(logging_obj: Logging):
|
||||
"""Test that the start time is set correctly"""
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
class MockCallback(CustomLogger):
|
||||
pass
|
||||
|
||||
mock_callback = MockCallback()
|
||||
litellm.success_callback = [mock_callback, "langfuse"]
|
||||
|
||||
completion_stream = ModelResponseListIterator(
|
||||
model_responses=bedrock_chunks, delay=0.1
|
||||
)
|
||||
|
||||
response = CustomStreamWrapper(
|
||||
completion_stream=completion_stream,
|
||||
model="bedrock/claude-3-5-sonnet-20240620-v1:0",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
|
||||
await asyncio.sleep(2)
|
||||
|
||||
assert logging_obj.model_call_details["completion_start_time"] is not None
|
||||
assert (
|
||||
logging_obj.model_call_details["completion_start_time"]
|
||||
< logging_obj.model_call_details["end_time"]
|
||||
)
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.llms.openrouter.chat.transformation import (
|
||||
OpenRouterChatCompletionStreamingHandler,
|
||||
OpenRouterException,
|
||||
)
|
||||
|
||||
|
||||
class TestOpenRouterChatCompletionStreamingHandler:
|
||||
def test_chunk_parser_successful(self):
|
||||
handler = OpenRouterChatCompletionStreamingHandler(
|
||||
streaming_response=None, sync_stream=True
|
||||
)
|
||||
|
||||
# Test input chunk
|
||||
chunk = {
|
||||
"id": "test_id",
|
||||
"created": 1234567890,
|
||||
"model": "test_model",
|
||||
"choices": [
|
||||
{"delta": {"content": "test content", "reasoning": "test reasoning"}}
|
||||
],
|
||||
}
|
||||
|
||||
# Parse chunk
|
||||
result = handler.chunk_parser(chunk)
|
||||
|
||||
# Verify response
|
||||
assert result.id == "test_id"
|
||||
assert result.object == "chat.completion.chunk"
|
||||
assert result.created == 1234567890
|
||||
assert result.model == "test_model"
|
||||
assert len(result.choices) == 1
|
||||
assert result.choices[0]["delta"]["reasoning_content"] == "test reasoning"
|
||||
|
||||
def test_chunk_parser_error_response(self):
|
||||
handler = OpenRouterChatCompletionStreamingHandler(
|
||||
streaming_response=None, sync_stream=True
|
||||
)
|
||||
|
||||
# Test error chunk
|
||||
error_chunk = {
|
||||
"error": {
|
||||
"message": "test error",
|
||||
"code": 400,
|
||||
"metadata": {"key": "value"},
|
||||
"user_id": "test_user",
|
||||
}
|
||||
}
|
||||
|
||||
# Verify error handling
|
||||
with pytest.raises(OpenRouterException) as exc_info:
|
||||
handler.chunk_parser(error_chunk)
|
||||
|
||||
assert "Message: test error" in str(exc_info.value)
|
||||
assert exc_info.value.status_code == 400
|
||||
|
||||
def test_chunk_parser_key_error(self):
|
||||
handler = OpenRouterChatCompletionStreamingHandler(
|
||||
streaming_response=None, sync_stream=True
|
||||
)
|
||||
|
||||
# Test invalid chunk missing required fields
|
||||
invalid_chunk = {"incomplete": "data"}
|
||||
|
||||
# Verify KeyError handling
|
||||
with pytest.raises(OpenRouterException) as exc_info:
|
||||
handler.chunk_parser(invalid_chunk)
|
||||
|
||||
assert "KeyError" in str(exc_info.value)
|
||||
assert exc_info.value.status_code == 400
|
97
tests/litellm/llms/sagemaker/test_sagemaker_common_utils.py
Normal file
97
tests/litellm/llms/sagemaker/test_sagemaker_common_utils.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../../../.."))
|
||||
from litellm.llms.sagemaker.common_utils import AWSEventStreamDecoder
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aiter_bytes_unicode_decode_error():
|
||||
"""
|
||||
Test that AWSEventStreamDecoder.aiter_bytes() does not raise an error when encountering invalid UTF-8 bytes. (UnicodeDecodeError)
|
||||
|
||||
|
||||
Ensures stream processing continues despite the error.
|
||||
|
||||
Relevant issue: https://github.com/BerriAI/litellm/issues/9165
|
||||
"""
|
||||
# Create an instance of AWSEventStreamDecoder
|
||||
decoder = AWSEventStreamDecoder(model="test-model")
|
||||
|
||||
# Create a mock event that will trigger a UnicodeDecodeError
|
||||
mock_event = MagicMock()
|
||||
mock_event.to_response_dict.return_value = {
|
||||
"status_code": 200,
|
||||
"headers": {},
|
||||
"body": b"\xff\xfe", # Invalid UTF-8 bytes
|
||||
}
|
||||
|
||||
# Create a mock EventStreamBuffer that yields our mock event
|
||||
mock_buffer = MagicMock()
|
||||
mock_buffer.__iter__.return_value = [mock_event]
|
||||
|
||||
# Mock the EventStreamBuffer class
|
||||
with patch("botocore.eventstream.EventStreamBuffer", return_value=mock_buffer):
|
||||
# Create an async generator that yields some test bytes
|
||||
async def mock_iterator():
|
||||
yield b""
|
||||
|
||||
# Process the stream
|
||||
chunks = []
|
||||
async for chunk in decoder.aiter_bytes(mock_iterator()):
|
||||
if chunk is not None:
|
||||
print("chunk=", chunk)
|
||||
chunks.append(chunk)
|
||||
|
||||
# Verify that processing continued despite the error
|
||||
# The chunks list should be empty since we only sent invalid data
|
||||
assert len(chunks) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aiter_bytes_valid_chunk_followed_by_unicode_error():
|
||||
"""
|
||||
Test that valid chunks are processed correctly even when followed by Unicode decode errors.
|
||||
This ensures errors don't corrupt or prevent processing of valid data that came before.
|
||||
|
||||
Relevant issue: https://github.com/BerriAI/litellm/issues/9165
|
||||
"""
|
||||
decoder = AWSEventStreamDecoder(model="test-model")
|
||||
|
||||
# Create two mock events - first valid, then invalid
|
||||
mock_valid_event = MagicMock()
|
||||
mock_valid_event.to_response_dict.return_value = {
|
||||
"status_code": 200,
|
||||
"headers": {},
|
||||
"body": json.dumps({"token": {"text": "hello"}}).encode(), # Valid data first
|
||||
}
|
||||
|
||||
mock_invalid_event = MagicMock()
|
||||
mock_invalid_event.to_response_dict.return_value = {
|
||||
"status_code": 200,
|
||||
"headers": {},
|
||||
"body": b"\xff\xfe", # Invalid UTF-8 bytes second
|
||||
}
|
||||
|
||||
# Create a mock EventStreamBuffer that yields valid event first, then invalid
|
||||
mock_buffer = MagicMock()
|
||||
mock_buffer.__iter__.return_value = [mock_valid_event, mock_invalid_event]
|
||||
|
||||
with patch("botocore.eventstream.EventStreamBuffer", return_value=mock_buffer):
|
||||
|
||||
async def mock_iterator():
|
||||
yield b"test_bytes"
|
||||
|
||||
chunks = []
|
||||
async for chunk in decoder.aiter_bytes(mock_iterator()):
|
||||
if chunk is not None:
|
||||
chunks.append(chunk)
|
||||
|
||||
# Verify we got our valid chunk despite the subsequent error
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0]["text"] == "hello" # Verify the content of the valid chunk
|
|
@ -1,137 +0,0 @@
|
|||
import os
|
||||
import sys
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
|
||||
|
||||
|
||||
def test_anthropic_prompt_caching_headers_for_vertex():
|
||||
"""
|
||||
Test that the prompt caching beta header is correctly set for Vertex AI requests
|
||||
with Anthropic models when cache control is present in the messages.
|
||||
"""
|
||||
# Create an instance of AnthropicConfig
|
||||
config = AnthropicConfig()
|
||||
|
||||
# Test case 1: Vertex request with prompt caching
|
||||
# Create a message with cache control
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant.",
|
||||
"cache_control": {"type": "ephemeral"}
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Tell me about the solar system."
|
||||
}
|
||||
]
|
||||
|
||||
# Check if cache control is detected
|
||||
is_cache_control_set = config.is_cache_control_set(messages=messages)
|
||||
assert is_cache_control_set is True, "Cache control should be detected in messages"
|
||||
|
||||
# Generate headers for a Vertex AI request with prompt caching
|
||||
headers = config.get_anthropic_headers(
|
||||
api_key="test-api-key",
|
||||
prompt_caching_set=is_cache_control_set,
|
||||
is_vertex_request=True
|
||||
)
|
||||
|
||||
# Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
|
||||
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
|
||||
assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"
|
||||
|
||||
# Test case 2: Vertex request without prompt caching
|
||||
messages_without_cache = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Tell me about the solar system."
|
||||
}
|
||||
]
|
||||
|
||||
# Check if cache control is detected
|
||||
is_cache_control_set = config.is_cache_control_set(messages=messages_without_cache)
|
||||
assert is_cache_control_set is False, "Cache control should not be detected in messages"
|
||||
|
||||
# Generate headers for a Vertex AI request without prompt caching
|
||||
headers = config.get_anthropic_headers(
|
||||
api_key="test-api-key",
|
||||
prompt_caching_set=is_cache_control_set,
|
||||
is_vertex_request=True
|
||||
)
|
||||
|
||||
# Verify that the anthropic-beta header is not set
|
||||
assert "anthropic-beta" not in headers, "anthropic-beta header should not be present"
|
||||
|
||||
|
||||
def test_anthropic_prompt_caching_with_content_blocks():
|
||||
"""
|
||||
Test that prompt caching is correctly detected when cache control is in content blocks.
|
||||
"""
|
||||
config = AnthropicConfig()
|
||||
|
||||
# Message with cache control in content blocks
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "You are a helpful assistant.",
|
||||
"cache_control": {"type": "ephemeral"}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Tell me about the solar system."
|
||||
}
|
||||
]
|
||||
|
||||
# Check if cache control is detected
|
||||
is_cache_control_set = config.is_cache_control_set(messages=messages)
|
||||
assert is_cache_control_set is True, "Cache control should be detected in content blocks"
|
||||
|
||||
# Generate headers for a Vertex AI request with prompt caching
|
||||
headers = config.get_anthropic_headers(
|
||||
api_key="test-api-key",
|
||||
prompt_caching_set=is_cache_control_set,
|
||||
is_vertex_request=True
|
||||
)
|
||||
|
||||
# Verify that the anthropic-beta header is set with prompt-caching-2024-07-31
|
||||
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
|
||||
assert "prompt-caching-2024-07-31" in headers["anthropic-beta"], "prompt-caching-2024-07-31 should be in the beta header"
|
||||
|
||||
|
||||
def test_anthropic_vertex_other_beta_headers():
|
||||
"""
|
||||
Test that other beta headers are not included for Vertex AI requests.
|
||||
"""
|
||||
config = AnthropicConfig()
|
||||
|
||||
# Generate headers with multiple beta features
|
||||
headers = config.get_anthropic_headers(
|
||||
api_key="test-api-key",
|
||||
prompt_caching_set=True,
|
||||
computer_tool_used=True, # This should be excluded for Vertex
|
||||
pdf_used=True, # This should be excluded for Vertex
|
||||
is_vertex_request=True
|
||||
)
|
||||
|
||||
# Verify that only prompt-caching is included in the beta header
|
||||
assert "anthropic-beta" in headers, "anthropic-beta header should be present"
|
||||
assert headers["anthropic-beta"] == "prompt-caching-2024-07-31", "Only prompt-caching should be in the beta header"
|
||||
assert "computer-use-2024-10-22" not in headers["anthropic-beta"], "computer-use beta should not be included"
|
||||
assert "pdfs-2024-09-25" not in headers["anthropic-beta"], "pdfs beta should not be included"
|
|
@ -39,7 +39,7 @@ async def test_request_body_caching():
|
|||
result1 = await _read_request_body(mock_request)
|
||||
assert result1 == test_data
|
||||
assert "parsed_body" in mock_request.scope
|
||||
assert mock_request.scope["parsed_body"] == test_data
|
||||
assert mock_request.scope["parsed_body"] == (("key",), {"key": "value"})
|
||||
|
||||
# Verify the body was read once
|
||||
mock_request.body.assert_called_once()
|
||||
|
@ -49,7 +49,7 @@ async def test_request_body_caching():
|
|||
|
||||
# Second call should use the cached body
|
||||
result2 = await _read_request_body(mock_request)
|
||||
assert result2 == test_data
|
||||
assert result2 == {"key": "value"}
|
||||
|
||||
# Verify the body was not read again
|
||||
mock_request.body.assert_not_called()
|
||||
|
@ -75,7 +75,10 @@ async def test_form_data_parsing():
|
|||
# Verify the form data was correctly parsed
|
||||
assert result == test_data
|
||||
assert "parsed_body" in mock_request.scope
|
||||
assert mock_request.scope["parsed_body"] == test_data
|
||||
assert mock_request.scope["parsed_body"] == (
|
||||
("name", "message"),
|
||||
{"name": "test_user", "message": "hello world"},
|
||||
)
|
||||
|
||||
# Verify form() was called
|
||||
mock_request.form.assert_called_once()
|
||||
|
@ -101,7 +104,46 @@ async def test_empty_request_body():
|
|||
# Verify an empty dict is returned
|
||||
assert result == {}
|
||||
assert "parsed_body" in mock_request.scope
|
||||
assert mock_request.scope["parsed_body"] == {}
|
||||
assert mock_request.scope["parsed_body"] == ((), {})
|
||||
|
||||
# Verify the body was read
|
||||
mock_request.body.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_circular_reference_handling():
|
||||
"""
|
||||
Test that cached request body isn't modified when the returned result is modified.
|
||||
Demonstrates the mutable dictionary reference issue.
|
||||
"""
|
||||
# Create a mock request with initial data
|
||||
mock_request = MagicMock()
|
||||
initial_body = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
}
|
||||
|
||||
mock_request.body = AsyncMock(return_value=orjson.dumps(initial_body))
|
||||
mock_request.headers = {"content-type": "application/json"}
|
||||
mock_request.scope = {}
|
||||
|
||||
# First parse
|
||||
result = await _read_request_body(mock_request)
|
||||
|
||||
# Verify initial parse
|
||||
assert result["model"] == "gpt-4"
|
||||
assert result["messages"] == [{"role": "user", "content": "Hello"}]
|
||||
|
||||
# Modify the result by adding proxy_server_request
|
||||
result["proxy_server_request"] = {
|
||||
"url": "http://0.0.0.0:4000/v1/chat/completions",
|
||||
"method": "POST",
|
||||
"headers": {"content-type": "application/json"},
|
||||
"body": result, # Creates circular reference
|
||||
}
|
||||
|
||||
# Second parse using the same request - will use the modified cached value
|
||||
result2 = await _read_request_body(mock_request)
|
||||
assert (
|
||||
"proxy_server_request" not in result2
|
||||
) # This will pass, showing the cache pollution
|
||||
|
|
|
@ -0,0 +1,264 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from litellm.proxy._types import (
|
||||
DailyUserSpendTransaction,
|
||||
Litellm_EntityType,
|
||||
SpendUpdateQueueItem,
|
||||
)
|
||||
from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
|
||||
DailySpendUpdateQueue,
|
||||
)
|
||||
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def daily_spend_update_queue():
|
||||
return DailySpendUpdateQueue()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_queue_flush(daily_spend_update_queue):
|
||||
"""Test flushing an empty queue returns an empty list"""
|
||||
result = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
|
||||
assert result == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_single_update(daily_spend_update_queue):
|
||||
"""Test adding a single update to the queue"""
|
||||
test_key = "user1_2023-01-01_key123_gpt-4_openai"
|
||||
test_transaction = {
|
||||
"spend": 10.0,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 50,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
# Add update to queue
|
||||
await daily_spend_update_queue.add_update({test_key: test_transaction})
|
||||
|
||||
# Flush and check
|
||||
updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
|
||||
assert len(updates) == 1
|
||||
assert test_key in updates[0]
|
||||
assert updates[0][test_key] == test_transaction
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_multiple_updates(daily_spend_update_queue):
|
||||
"""Test adding multiple updates to the queue"""
|
||||
test_key1 = "user1_2023-01-01_key123_gpt-4_openai"
|
||||
test_transaction1 = {
|
||||
"spend": 10.0,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 50,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
test_key2 = "user2_2023-01-01_key456_gpt-3.5-turbo_openai"
|
||||
test_transaction2 = {
|
||||
"spend": 5.0,
|
||||
"prompt_tokens": 200,
|
||||
"completion_tokens": 30,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
# Add updates to queue
|
||||
await daily_spend_update_queue.add_update({test_key1: test_transaction1})
|
||||
await daily_spend_update_queue.add_update({test_key2: test_transaction2})
|
||||
|
||||
# Flush and check
|
||||
updates = await daily_spend_update_queue.flush_all_updates_from_in_memory_queue()
|
||||
assert len(updates) == 2
|
||||
|
||||
# Find each transaction in the list of updates
|
||||
found_transaction1 = False
|
||||
found_transaction2 = False
|
||||
|
||||
for update in updates:
|
||||
if test_key1 in update:
|
||||
assert update[test_key1] == test_transaction1
|
||||
found_transaction1 = True
|
||||
if test_key2 in update:
|
||||
assert update[test_key2] == test_transaction2
|
||||
found_transaction2 = True
|
||||
|
||||
assert found_transaction1
|
||||
assert found_transaction2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aggregated_daily_spend_update_empty(daily_spend_update_queue):
|
||||
"""Test aggregating updates from an empty queue"""
|
||||
result = (
|
||||
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
|
||||
)
|
||||
assert result == {}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_aggregated_daily_spend_update_transactions_single_key():
|
||||
"""Test static method for aggregating a single key"""
|
||||
test_key = "user1_2023-01-01_key123_gpt-4_openai"
|
||||
test_transaction = {
|
||||
"spend": 10.0,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 50,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
updates = [{test_key: test_transaction}]
|
||||
|
||||
# Test aggregation
|
||||
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
|
||||
updates
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
assert test_key in result
|
||||
assert result[test_key] == test_transaction
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_aggregated_daily_spend_update_transactions_multiple_keys():
|
||||
"""Test static method for aggregating multiple different keys"""
|
||||
test_key1 = "user1_2023-01-01_key123_gpt-4_openai"
|
||||
test_transaction1 = {
|
||||
"spend": 10.0,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 50,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
test_key2 = "user2_2023-01-01_key456_gpt-3.5-turbo_openai"
|
||||
test_transaction2 = {
|
||||
"spend": 5.0,
|
||||
"prompt_tokens": 200,
|
||||
"completion_tokens": 30,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
updates = [{test_key1: test_transaction1}, {test_key2: test_transaction2}]
|
||||
|
||||
# Test aggregation
|
||||
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
|
||||
updates
|
||||
)
|
||||
|
||||
assert len(result) == 2
|
||||
assert test_key1 in result
|
||||
assert test_key2 in result
|
||||
assert result[test_key1] == test_transaction1
|
||||
assert result[test_key2] == test_transaction2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_aggregated_daily_spend_update_transactions_same_key():
|
||||
"""Test static method for aggregating updates with the same key"""
|
||||
test_key = "user1_2023-01-01_key123_gpt-4_openai"
|
||||
test_transaction1 = {
|
||||
"spend": 10.0,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 50,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
test_transaction2 = {
|
||||
"spend": 5.0,
|
||||
"prompt_tokens": 200,
|
||||
"completion_tokens": 30,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
expected_transaction = {
|
||||
"spend": 15.0, # 10 + 5
|
||||
"prompt_tokens": 300, # 100 + 200
|
||||
"completion_tokens": 80, # 50 + 30
|
||||
"api_requests": 2, # 1 + 1
|
||||
"successful_requests": 2, # 1 + 1
|
||||
"failed_requests": 0, # 0 + 0
|
||||
}
|
||||
|
||||
updates = [{test_key: test_transaction1}, {test_key: test_transaction2}]
|
||||
|
||||
# Test aggregation
|
||||
result = DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions(
|
||||
updates
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
assert test_key in result
|
||||
assert result[test_key] == expected_transaction
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_flush_and_get_aggregated_daily_spend_update_transactions(
|
||||
daily_spend_update_queue,
|
||||
):
|
||||
"""Test the full workflow of adding, flushing, and aggregating updates"""
|
||||
test_key = "user1_2023-01-01_key123_gpt-4_openai"
|
||||
test_transaction1 = {
|
||||
"spend": 10.0,
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 50,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
test_transaction2 = {
|
||||
"spend": 5.0,
|
||||
"prompt_tokens": 200,
|
||||
"completion_tokens": 30,
|
||||
"api_requests": 1,
|
||||
"successful_requests": 1,
|
||||
"failed_requests": 0,
|
||||
}
|
||||
|
||||
expected_transaction = {
|
||||
"spend": 15.0, # 10 + 5
|
||||
"prompt_tokens": 300, # 100 + 200
|
||||
"completion_tokens": 80, # 50 + 30
|
||||
"api_requests": 2, # 1 + 1
|
||||
"successful_requests": 2, # 1 + 1
|
||||
"failed_requests": 0, # 0 + 0
|
||||
}
|
||||
|
||||
# Add updates to queue
|
||||
await daily_spend_update_queue.add_update({test_key: test_transaction1})
|
||||
await daily_spend_update_queue.add_update({test_key: test_transaction2})
|
||||
|
||||
# Test full workflow
|
||||
result = (
|
||||
await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
|
||||
)
|
||||
|
||||
assert len(result) == 1
|
||||
assert test_key in result
|
||||
assert result[test_key] == expected_transaction
|
|
@ -12,7 +12,7 @@ sys.path.insert(
|
|||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS
|
||||
from litellm.proxy.db.pod_lock_manager import PodLockManager
|
||||
from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
|
||||
|
||||
|
||||
# Mock Prisma client class
|
|
@ -0,0 +1,152 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from litellm.proxy._types import Litellm_EntityType, SpendUpdateQueueItem
|
||||
from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def spend_queue():
|
||||
return SpendUpdateQueue()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_update(spend_queue):
|
||||
# Test adding a single update
|
||||
update: SpendUpdateQueueItem = {
|
||||
"entity_type": Litellm_EntityType.USER,
|
||||
"entity_id": "user123",
|
||||
"response_cost": 0.5,
|
||||
}
|
||||
await spend_queue.add_update(update)
|
||||
|
||||
# Verify update was added by checking queue size
|
||||
assert spend_queue.update_queue.qsize() == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_response_cost(spend_queue):
|
||||
# Test with missing response_cost - should default to 0
|
||||
update: SpendUpdateQueueItem = {
|
||||
"entity_type": Litellm_EntityType.USER,
|
||||
"entity_id": "user123",
|
||||
}
|
||||
|
||||
await spend_queue.add_update(update)
|
||||
aggregated = (
|
||||
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
|
||||
)
|
||||
|
||||
# Should have created entry with 0 cost
|
||||
assert aggregated["user_list_transactions"]["user123"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_entity_id(spend_queue):
|
||||
# Test with missing entity_id - should default to empty string
|
||||
update: SpendUpdateQueueItem = {
|
||||
"entity_type": Litellm_EntityType.USER,
|
||||
"response_cost": 1.0,
|
||||
}
|
||||
|
||||
await spend_queue.add_update(update)
|
||||
aggregated = (
|
||||
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
|
||||
)
|
||||
|
||||
# Should use empty string as key
|
||||
assert aggregated["user_list_transactions"][""] == 1.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_none_values(spend_queue):
|
||||
# Test with None values
|
||||
update: SpendUpdateQueueItem = {
|
||||
"entity_type": Litellm_EntityType.USER,
|
||||
"entity_id": None, # type: ignore
|
||||
"response_cost": None,
|
||||
}
|
||||
|
||||
await spend_queue.add_update(update)
|
||||
aggregated = (
|
||||
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
|
||||
)
|
||||
|
||||
# Should handle None values gracefully
|
||||
assert aggregated["user_list_transactions"][""] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_updates_with_missing_fields(spend_queue):
|
||||
# Test multiple updates with various missing fields
|
||||
updates: list[SpendUpdateQueueItem] = [
|
||||
{
|
||||
"entity_type": Litellm_EntityType.USER,
|
||||
"entity_id": "user123",
|
||||
"response_cost": 0.5,
|
||||
},
|
||||
{
|
||||
"entity_type": Litellm_EntityType.USER,
|
||||
"entity_id": "user123", # missing response_cost
|
||||
},
|
||||
{
|
||||
"entity_type": Litellm_EntityType.USER, # missing entity_id
|
||||
"response_cost": 1.5,
|
||||
},
|
||||
]
|
||||
|
||||
for update in updates:
|
||||
await spend_queue.add_update(update)
|
||||
|
||||
aggregated = (
|
||||
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
|
||||
)
|
||||
|
||||
# Verify aggregation
|
||||
assert (
|
||||
aggregated["user_list_transactions"]["user123"] == 0.5
|
||||
) # only the first update with valid cost
|
||||
assert (
|
||||
aggregated["user_list_transactions"][""] == 1.5
|
||||
) # update with missing entity_id
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_entity_type(spend_queue):
|
||||
# Test with unknown entity type
|
||||
update: SpendUpdateQueueItem = {
|
||||
"entity_type": "UNKNOWN_TYPE", # type: ignore
|
||||
"entity_id": "123",
|
||||
"response_cost": 0.5,
|
||||
}
|
||||
|
||||
await spend_queue.add_update(update)
|
||||
aggregated = (
|
||||
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
|
||||
)
|
||||
|
||||
# Should ignore unknown entity type
|
||||
assert all(len(transactions) == 0 for transactions in aggregated.values())
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_entity_type(spend_queue):
|
||||
# Test with missing entity type
|
||||
update: SpendUpdateQueueItem = {"entity_id": "123", "response_cost": 0.5}
|
||||
|
||||
await spend_queue.add_update(update)
|
||||
aggregated = (
|
||||
await spend_queue.flush_and_get_aggregated_db_spend_update_transactions()
|
||||
)
|
||||
|
||||
# Should ignore updates without entity type
|
||||
assert all(len(transactions) == 0 for transactions in aggregated.values())
|
|
@ -55,3 +55,30 @@ async def test_ui_view_users_with_null_email(mocker, caplog):
|
|||
assert response == [
|
||||
LiteLLM_UserTableFiltered(user_id="test-user-null-email", user_email=None)
|
||||
]
|
||||
|
||||
|
||||
def test_user_daily_activity_types():
|
||||
"""
|
||||
Assert all fiels in SpendMetrics are reported in DailySpendMetadata as "total_"
|
||||
"""
|
||||
from litellm.proxy.management_endpoints.internal_user_endpoints import (
|
||||
DailySpendMetadata,
|
||||
SpendMetrics,
|
||||
)
|
||||
|
||||
# Create a SpendMetrics instance
|
||||
spend_metrics = SpendMetrics()
|
||||
|
||||
# Create a DailySpendMetadata instance
|
||||
daily_spend_metadata = DailySpendMetadata()
|
||||
|
||||
# Assert all fields in SpendMetrics are reported in DailySpendMetadata as "total_"
|
||||
for field in spend_metrics.__dict__:
|
||||
if field.startswith("total_"):
|
||||
assert hasattr(
|
||||
daily_spend_metadata, field
|
||||
), f"Field {field} is not reported in DailySpendMetadata"
|
||||
else:
|
||||
assert not hasattr(
|
||||
daily_spend_metadata, field
|
||||
), f"Field {field} is reported in DailySpendMetadata"
|
||||
|
|
105
tests/litellm/proxy/test_litellm_pre_call_utils.py
Normal file
105
tests/litellm/proxy/test_litellm_pre_call_utils.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.litellm_pre_call_utils import (
|
||||
_get_enforced_params,
|
||||
check_if_token_is_service_account,
|
||||
)
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
def test_check_if_token_is_service_account():
|
||||
"""
|
||||
Test that only keys with `service_account_id` in metadata are considered service accounts
|
||||
"""
|
||||
# Test case 1: Service account token
|
||||
service_account_token = UserAPIKeyAuth(
|
||||
api_key="test-key", metadata={"service_account_id": "test-service-account"}
|
||||
)
|
||||
assert check_if_token_is_service_account(service_account_token) == True
|
||||
|
||||
# Test case 2: Regular user token
|
||||
regular_token = UserAPIKeyAuth(api_key="test-key", metadata={})
|
||||
assert check_if_token_is_service_account(regular_token) == False
|
||||
|
||||
# Test case 3: Token with other metadata
|
||||
other_metadata_token = UserAPIKeyAuth(
|
||||
api_key="test-key", metadata={"user_id": "test-user"}
|
||||
)
|
||||
assert check_if_token_is_service_account(other_metadata_token) == False
|
||||
|
||||
|
||||
def test_get_enforced_params_for_service_account_settings():
|
||||
"""
|
||||
Test that service account enforced params are only added to service account keys
|
||||
"""
|
||||
service_account_token = UserAPIKeyAuth(
|
||||
api_key="test-key", metadata={"service_account_id": "test-service-account"}
|
||||
)
|
||||
general_settings_with_service_account_settings = {
|
||||
"service_account_settings": {"enforced_params": ["metadata.service"]},
|
||||
}
|
||||
result = _get_enforced_params(
|
||||
general_settings=general_settings_with_service_account_settings,
|
||||
user_api_key_dict=service_account_token,
|
||||
)
|
||||
assert result == ["metadata.service"]
|
||||
|
||||
regular_token = UserAPIKeyAuth(
|
||||
api_key="test-key", metadata={"enforced_params": ["user"]}
|
||||
)
|
||||
result = _get_enforced_params(
|
||||
general_settings=general_settings_with_service_account_settings,
|
||||
user_api_key_dict=regular_token,
|
||||
)
|
||||
assert result == ["user"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"general_settings, user_api_key_dict, expected_enforced_params",
|
||||
[
|
||||
(
|
||||
{"enforced_params": ["param1", "param2"]},
|
||||
UserAPIKeyAuth(
|
||||
api_key="test_api_key", user_id="test_user_id", org_id="test_org_id"
|
||||
),
|
||||
["param1", "param2"],
|
||||
),
|
||||
(
|
||||
{"service_account_settings": {"enforced_params": ["param1", "param2"]}},
|
||||
UserAPIKeyAuth(
|
||||
api_key="test_api_key",
|
||||
user_id="test_user_id",
|
||||
org_id="test_org_id",
|
||||
metadata={"service_account_id": "test_service_account_id"},
|
||||
),
|
||||
["param1", "param2"],
|
||||
),
|
||||
(
|
||||
{"service_account_settings": {"enforced_params": ["param1", "param2"]}},
|
||||
UserAPIKeyAuth(
|
||||
api_key="test_api_key",
|
||||
metadata={
|
||||
"enforced_params": ["param3", "param4"],
|
||||
"service_account_id": "test_service_account_id",
|
||||
},
|
||||
),
|
||||
["param1", "param2", "param3", "param4"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_get_enforced_params(
|
||||
general_settings, user_api_key_dict, expected_enforced_params
|
||||
):
|
||||
from litellm.proxy.litellm_pre_call_utils import _get_enforced_params
|
||||
|
||||
enforced_params = _get_enforced_params(general_settings, user_api_key_dict)
|
||||
assert enforced_params == expected_enforced_params
|
|
@ -198,6 +198,42 @@ class BaseLLMChatTest(ABC):
|
|||
messages=image_messages,
|
||||
)
|
||||
assert response is not None
|
||||
|
||||
def test_file_data_unit_test(self, pdf_messages):
|
||||
from litellm.utils import supports_pdf_input, return_raw_request
|
||||
from litellm.types.utils import CallTypes
|
||||
from litellm.litellm_core_utils.prompt_templates.factory import convert_to_anthropic_image_obj
|
||||
|
||||
media_chunk = convert_to_anthropic_image_obj(
|
||||
openai_image_url=pdf_messages,
|
||||
format=None,
|
||||
)
|
||||
|
||||
file_content = [
|
||||
{"type": "text", "text": "What's this file about?"},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": pdf_messages,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
image_messages = [{"role": "user", "content": file_content}]
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
|
||||
if not supports_pdf_input(base_completion_call_args["model"], None):
|
||||
pytest.skip("Model does not support image input")
|
||||
|
||||
raw_request = return_raw_request(
|
||||
endpoint=CallTypes.completion,
|
||||
kwargs={**base_completion_call_args, "messages": image_messages},
|
||||
)
|
||||
|
||||
print("RAW REQUEST", raw_request)
|
||||
|
||||
assert media_chunk["data"] in json.dumps(raw_request)
|
||||
|
||||
def test_message_with_name(self):
|
||||
try:
|
||||
|
|
|
@ -268,7 +268,7 @@ async def test_vision_with_custom_model():
|
|||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": ""
|
||||
"url": ""
|
||||
},
|
||||
},
|
||||
],
|
||||
|
|
|
@ -1379,3 +1379,20 @@ def test_azure_modalities_param():
|
|||
)
|
||||
assert optional_params["modalities"] == ["text", "audio"]
|
||||
assert optional_params["audio"] == {"type": "audio_input", "input": "test.wav"}
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, provider",
|
||||
[
|
||||
("claude-3-7-sonnet-20240620-v1:0", "anthropic"),
|
||||
("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"),
|
||||
("invoke/anthropic.claude-3-7-sonnet-20240620-v1:0", "bedrock"),
|
||||
("claude-3-7-sonnet@20250219", "vertex_ai"),
|
||||
],
|
||||
)
|
||||
def test_anthropic_unified_reasoning_content(model, provider):
|
||||
optional_params = get_optional_params(
|
||||
model=model,
|
||||
custom_llm_provider=provider,
|
||||
reasoning_effort="high",
|
||||
)
|
||||
assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue