Compare commits

..

1 commit

Author SHA1 Message Date
Ishaan Jaff
d69552718d fix latency issues on google ai studio 2024-11-21 08:34:04 -08:00
259 changed files with 4018 additions and 11086 deletions

View file

@ -625,48 +625,6 @@ jobs:
paths:
- llm_translation_coverage.xml
- llm_translation_coverage
pass_through_unit_testing:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
steps:
- checkout
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-cov==5.0.0"
pip install "pytest-asyncio==0.21.1"
pip install "respx==0.21.1"
# Run pytest and generate JUnit XML report
- run:
name: Run tests
command: |
pwd
ls
python -m pytest -vv tests/pass_through_unit_tests --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml pass_through_unit_tests_coverage.xml
mv .coverage pass_through_unit_tests_coverage
# Store test results
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- pass_through_unit_tests_coverage.xml
- pass_through_unit_tests_coverage
image_gen_testing:
docker:
- image: cimg/python:3.11
@ -807,14 +765,12 @@ jobs:
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
- run: ruff check ./litellm
# - run: python ./tests/documentation_tests/test_general_setting_keys.py
- run: python ./tests/documentation_tests/test_general_setting_keys.py
- run: python ./tests/code_coverage_tests/router_code_coverage.py
- run: python ./tests/code_coverage_tests/test_router_strategy_async.py
- run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py
- run: python ./tests/documentation_tests/test_env_keys.py
- run: python ./tests/documentation_tests/test_router_settings.py
- run: python ./tests/documentation_tests/test_api_docs.py
- run: python ./tests/code_coverage_tests/ensure_async_clients_test.py
- run: helm lint ./deploy/charts/litellm-helm
db_migration_disable_update_check:
@ -966,7 +922,7 @@ jobs:
command: |
pwd
ls
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/image_gen_tests
no_output_timeout: 120m
# Store test results
@ -1180,7 +1136,6 @@ jobs:
pip install "PyGithub==1.59.1"
pip install "google-cloud-aiplatform==1.59.0"
pip install anthropic
# Run pytest and generate JUnit XML report
- run:
name: Build Docker image
command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
@ -1192,7 +1147,6 @@ jobs:
-e DATABASE_URL=$PROXY_DATABASE_URL \
-e LITELLM_MASTER_KEY="sk-1234" \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
-e GEMINI_API_KEY=$GEMINI_API_KEY \
-e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
-e LITELLM_LICENSE=$LITELLM_LICENSE \
--name my-app \
@ -1217,27 +1171,6 @@ jobs:
- run:
name: Wait for app to be ready
command: dockerize -wait http://localhost:4000 -timeout 5m
# New steps to run Node.js test
- run:
name: Install Node.js
command: |
curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
sudo apt-get install -y nodejs
node --version
npm --version
- run:
name: Install Node.js dependencies
command: |
npm install @google-cloud/vertexai
npm install @google/generative-ai
npm install --save-dev jest
- run:
name: Run Vertex AI, Google AI Studio Node.js tests
command: |
npx jest tests/pass_through_tests --verbose
no_output_timeout: 30m
- run:
name: Run tests
command: |
@ -1245,6 +1178,7 @@ jobs:
ls
python -m pytest -vv tests/pass_through_tests/ -x --junitxml=test-results/junit.xml --durations=5
no_output_timeout: 120m
# Store test results
- store_test_results:
path: test-results
@ -1270,7 +1204,7 @@ jobs:
python -m venv venv
. venv/bin/activate
pip install coverage
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage
coverage xml
- codecov/upload:
file: ./coverage.xml
@ -1376,7 +1310,6 @@ jobs:
name: Install Dependencies
command: |
npm install -D @playwright/test
npm install @google-cloud/vertexai
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
@ -1408,7 +1341,7 @@ jobs:
command: |
docker run -d \
-p 4000:4000 \
-e DATABASE_URL=$PROXY_DATABASE_URL_2 \
-e DATABASE_URL=$PROXY_DATABASE_URL \
-e LITELLM_MASTER_KEY="sk-1234" \
-e OPENAI_API_KEY=$OPENAI_API_KEY \
-e UI_USERNAME="admin" \
@ -1438,7 +1371,7 @@ jobs:
- run:
name: Run Playwright Tests
command: |
npx playwright test e2e_ui_tests/ --reporter=html --output=test-results
npx playwright test --reporter=html --output=test-results
no_output_timeout: 120m
- store_test_results:
path: test-results
@ -1560,12 +1493,6 @@ workflows:
only:
- main
- /litellm_.*/
- pass_through_unit_testing:
filters:
branches:
only:
- main
- /litellm_.*/
- image_gen_testing:
filters:
branches:
@ -1581,7 +1508,6 @@ workflows:
- upload-coverage:
requires:
- llm_translation_testing
- pass_through_unit_testing
- image_gen_testing
- logging_testing
- litellm_router_testing
@ -1622,7 +1548,6 @@ workflows:
- load_testing
- test_bad_database_url
- llm_translation_testing
- pass_through_unit_testing
- image_gen_testing
- logging_testing
- litellm_router_testing

View file

@ -41,7 +41,7 @@ Use `litellm.get_supported_openai_params()` for an updated list of params for ea
| Provider | temperature | max_completion_tokens | max_tokens | top_p | stream | stream_options | stop | n | presence_penalty | frequency_penalty | functions | function_call | logit_bias | user | response_format | seed | tools | tool_choice | logprobs | top_logprobs | extra_headers |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|Anthropic| ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | | | | | | |✅ | ✅ | | ✅ | ✅ | | | ✅ |
|Anthropic| ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | | | | | | |✅ | ✅ | | ✅ | ✅ | | | ✅ |
|OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | ✅ |
|Azure OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |✅ | ✅ | | | ✅ |
|Replicate | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |

View file

@ -76,8 +76,6 @@ Works for:
- Vertex AI models (Gemini + Anthropic)
- Bedrock Models
- Anthropic API Models
- Groq Models
- Ollama Models
<Tabs>
<TabItem value="sdk" label="SDK">

View file

@ -1,7 +1,7 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Embeddings
# Embedding Models
## Quick Start
```python

View file

@ -1,74 +0,0 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Calling Finetuned Models
## OpenAI
| Model Name | Function Call |
|---------------------------|-----------------------------------------------------------------|
| fine tuned `gpt-4-0613` | `response = completion(model="ft:gpt-4-0613", messages=messages)` |
| fine tuned `gpt-4o-2024-05-13` | `response = completion(model="ft:gpt-4o-2024-05-13", messages=messages)` |
| fine tuned `gpt-3.5-turbo-0125` | `response = completion(model="ft:gpt-3.5-turbo-0125", messages=messages)` |
| fine tuned `gpt-3.5-turbo-1106` | `response = completion(model="ft:gpt-3.5-turbo-1106", messages=messages)` |
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
## Vertex AI
Fine tuned models on vertex have a numerical model/endpoint id.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
import os
## set ENV variables
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
os.environ["VERTEXAI_LOCATION"] = "us-central1"
response = completion(
model="vertex_ai/<your-finetuned-model>", # e.g. vertex_ai/4965075652664360960
messages=[{ "content": "Hello, how are you?","role": "user"}],
base_model="vertex_ai/gemini-1.5-pro" # the base model - used for routing
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Add Vertex Credentials to your env
```bash
!gcloud auth application-default login
```
2. Setup config.yaml
```yaml
- model_name: finetuned-gemini
litellm_params:
model: vertex_ai/<ENDPOINT_ID>
vertex_project: <PROJECT_ID>
vertex_location: <LOCATION>
model_info:
base_model: vertex_ai/gemini-1.5-pro # IMPORTANT
```
3. Test it!
```bash
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: <LITELLM_KEY>' \
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
```
</TabItem>
</Tabs>

View file

@ -1,4 +1,4 @@
# Images
# Image Generation
## Quick Start

View file

@ -1,135 +0,0 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Moderation
### Usage
<Tabs>
<TabItem value="python" label="LiteLLM Python SDK">
```python
from litellm import moderation
response = moderation(
input="hello from litellm",
model="text-moderation-stable"
)
```
</TabItem>
<TabItem value="proxy" label="LiteLLM Proxy Server">
For `/moderations` endpoint, there is **no need to specify `model` in the request or on the litellm config.yaml**
Start litellm proxy server
```
litellm
```
<Tabs>
<TabItem value="python" label="OpenAI Python SDK">
```python
from openai import OpenAI
# set base_url to your proxy server
# set api_key to send to proxy server
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
response = client.moderations.create(
input="hello from litellm",
model="text-moderation-stable" # optional, defaults to `omni-moderation-latest`
)
print(response)
```
</TabItem>
<TabItem value="curl" label="Curl Request">
```shell
curl --location 'http://0.0.0.0:4000/moderations' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer sk-1234' \
--data '{"input": "Sample text goes here", "model": "text-moderation-stable"}'
```
</TabItem>
</Tabs>
</TabItem>
</Tabs>
## Input Params
LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers.
### Required Fields
- `input`: *string or array* - Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
- If string: A string of text to classify for moderation
- If array of strings: An array of strings to classify for moderation
- If array of objects: An array of multi-modal inputs to the moderation model, where each object can be:
- An object describing an image to classify with:
- `type`: *string, required* - Always `image_url`
- `image_url`: *object, required* - Contains either an image URL or a data URL for a base64 encoded image
- An object describing text to classify with:
- `type`: *string, required* - Always `text`
- `text`: *string, required* - A string of text to classify
### Optional Fields
- `model`: *string (optional)* - The moderation model to use. Defaults to `omni-moderation-latest`.
## Output Format
Here's the exact json output and type you can expect from all moderation calls:
[**LiteLLM follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/moderations/object)
```python
{
"id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR",
"model": "text-moderation-007",
"results": [
{
"flagged": true,
"categories": {
"sexual": false,
"hate": false,
"harassment": true,
"self-harm": false,
"sexual/minors": false,
"hate/threatening": false,
"violence/graphic": false,
"self-harm/intent": false,
"self-harm/instructions": false,
"harassment/threatening": true,
"violence": true
},
"category_scores": {
"sexual": 0.000011726012417057063,
"hate": 0.22706663608551025,
"harassment": 0.5215635299682617,
"self-harm": 2.227119921371923e-6,
"sexual/minors": 7.107352217872176e-8,
"hate/threatening": 0.023547329008579254,
"violence/graphic": 0.00003391829886822961,
"self-harm/intent": 1.646940972932498e-6,
"self-harm/instructions": 1.1198755256458526e-9,
"harassment/threatening": 0.5694745779037476,
"violence": 0.9971134662628174
}
}
]
}
```
## **Supported Providers**
| Provider |
|-------------|
| OpenAI |

View file

@ -4,63 +4,24 @@ import TabItem from '@theme/TabItem';
# Argilla
Argilla is a collaborative annotation tool for AI engineers and domain experts who need to build high-quality datasets for their projects.
Argilla is a tool for annotating datasets.
## Getting Started
To log the data to Argilla, first you need to deploy the Argilla server. If you have not deployed the Argilla server, please follow the instructions [here](https://docs.argilla.io/latest/getting_started/quickstart/).
Next, you will need to configure and create the Argilla dataset.
```python
import argilla as rg
client = rg.Argilla(api_url="<api_url>", api_key="<api_key>")
settings = rg.Settings(
guidelines="These are some guidelines.",
fields=[
rg.ChatField(
name="user_input",
),
rg.TextField(
name="llm_output",
),
],
questions=[
rg.RatingQuestion(
name="rating",
values=[1, 2, 3, 4, 5, 6, 7],
),
],
)
dataset = rg.Dataset(
name="my_first_dataset",
settings=settings,
)
dataset.create()
```
For further configuration, please refer to the [Argilla documentation](https://docs.argilla.io/latest/how_to_guides/dataset/).
## Usage
## Usage
<Tabs>
<Tab value="sdk" label="SDK">
```python
import os
import litellm
from litellm import completion
import litellm
import os
# add env vars
os.environ["ARGILLA_API_KEY"]="argilla.apikey"
os.environ["ARGILLA_BASE_URL"]="http://localhost:6900"
os.environ["ARGILLA_DATASET_NAME"]="my_first_dataset"
os.environ["ARGILLA_DATASET_NAME"]="my_second_dataset"
os.environ["OPENAI_API_KEY"]="sk-proj-..."
litellm.callbacks = ["argilla"]

View file

@ -1,18 +1,10 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Anthropic SDK
# Anthropic `/v1/messages`
Pass-through endpoints for Anthropic - call provider-specific endpoint, in native format (no translation).
Just replace `https://api.anthropic.com` with `LITELLM_PROXY_BASE_URL/anthropic`
Just replace `https://api.anthropic.com` with `LITELLM_PROXY_BASE_URL/anthropic` 🚀
#### **Example Usage**
<Tabs>
<TabItem value="curl" label="curl">
```bash
curl --request POST \
--url http://0.0.0.0:4000/anthropic/v1/messages \
@ -28,33 +20,6 @@ curl --request POST \
}'
```
</TabItem>
<TabItem value="python" label="Anthropic Python SDK">
```python
from anthropic import Anthropic
# Initialize client with proxy base URL
client = Anthropic(
base_url="http://0.0.0.0:4000/anthropic", # <proxy-base-url>/anthropic
api_key="sk-anything" # proxy virtual key
)
# Make a completion request
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[
{"role": "user", "content": "Hello, world"}
]
)
print(response)
```
</TabItem>
</Tabs>
Supports **ALL** Anthropic Endpoints (including streaming).
[**See All Anthropic Endpoints**](https://docs.anthropic.com/en/api/messages)
@ -257,14 +222,14 @@ curl https://api.anthropic.com/v1/messages/batches \
```
## Advanced
## Advanced - Use with Virtual Keys
Pre-requisites
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
Use this, to avoid giving developers the raw Anthropic API key, but still letting them use Anthropic endpoints.
### Use with Virtual Keys
### Usage
1. Setup environment
@ -314,58 +279,4 @@ curl --request POST \
{"role": "user", "content": "Hello, world"}
]
}'
```
### Send `litellm_metadata` (tags)
<Tabs>
<TabItem value="curl" label="curl">
```bash
curl --request POST \
--url http://0.0.0.0:4000/anthropic/v1/messages \
--header 'accept: application/json' \
--header 'content-type: application/json' \
--header "Authorization: bearer sk-anything" \
--data '{
"model": "claude-3-5-sonnet-20241022",
"max_tokens": 1024,
"messages": [
{"role": "user", "content": "Hello, world"}
],
"litellm_metadata": {
"tags": ["test-tag-1", "test-tag-2"]
}
}'
```
</TabItem>
<TabItem value="python" label="Anthropic Python SDK">
```python
from anthropic import Anthropic
client = Anthropic(
base_url="http://0.0.0.0:4000/anthropic",
api_key="sk-anything"
)
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[
{"role": "user", "content": "Hello, world"}
],
extra_body={
"litellm_metadata": {
"tags": ["test-tag-1", "test-tag-2"]
}
}
)
print(response)
```
</TabItem>
</Tabs>
```

View file

@ -1,21 +1,12 @@
import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Google AI Studio SDK
Pass-through endpoints for Google AI Studio - call provider-specific endpoint, in native format (no translation).
Just replace `https://generativelanguage.googleapis.com` with `LITELLM_PROXY_BASE_URL/gemini`
Just replace `https://generativelanguage.googleapis.com` with `LITELLM_PROXY_BASE_URL/gemini` 🚀
#### **Example Usage**
<Tabs>
<TabItem value="curl" label="curl">
```bash
curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-anything' \
http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-anything' \
-H 'Content-Type: application/json' \
-d '{
"contents": [{
@ -26,53 +17,6 @@ curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=
}'
```
</TabItem>
<TabItem value="js" label="Google AI Node.js SDK">
```javascript
const { GoogleGenerativeAI } = require("@google/generative-ai");
const modelParams = {
model: 'gemini-pro',
};
const requestOptions = {
baseUrl: 'http://localhost:4000/gemini', // http://<proxy-base-url>/gemini
};
const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key
const model = genAI.getGenerativeModel(modelParams, requestOptions);
async function main() {
try {
const result = await model.generateContent("Explain how AI works");
console.log(result.response.text());
} catch (error) {
console.error('Error:', error);
}
}
// For streaming responses
async function main_streaming() {
try {
const streamingResult = await model.generateContentStream("Explain how AI works");
for await (const chunk of streamingResult.stream) {
console.log('Stream chunk:', JSON.stringify(chunk));
}
const aggregatedResponse = await streamingResult.response;
console.log('Aggregated response:', JSON.stringify(aggregatedResponse));
} catch (error) {
console.error('Error:', error);
}
}
main();
// main_streaming();
```
</TabItem>
</Tabs>
Supports **ALL** Google AI Studio Endpoints (including streaming).
[**See All Google AI Studio Endpoints**](https://ai.google.dev/api)
@ -222,14 +166,14 @@ curl -X POST "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5
```
## Advanced
## Advanced - Use with Virtual Keys
Pre-requisites
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
Use this, to avoid giving developers the raw Google AI Studio key, but still letting them use Google AI Studio endpoints.
### Use with Virtual Keys
### Usage
1. Setup environment
@ -276,66 +220,4 @@ http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-123
}]
}]
}'
```
### Send `tags` in request headers
Use this if you want `tags` to be tracked in the LiteLLM DB and on logging callbacks.
Pass tags in request headers as a comma separated list. In the example below the following tags will be tracked
```
tags: ["gemini-js-sdk", "pass-through-endpoint"]
```
<Tabs>
<TabItem value="curl" label="curl">
```bash
curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:generateContent?key=sk-anything' \
-H 'Content-Type: application/json' \
-H 'tags: gemini-js-sdk,pass-through-endpoint' \
-d '{
"contents": [{
"parts":[{
"text": "The quick brown fox jumps over the lazy dog."
}]
}]
}'
```
</TabItem>
<TabItem value="js" label="Google AI Node.js SDK">
```javascript
const { GoogleGenerativeAI } = require("@google/generative-ai");
const modelParams = {
model: 'gemini-pro',
};
const requestOptions = {
baseUrl: 'http://localhost:4000/gemini', // http://<proxy-base-url>/gemini
customHeaders: {
"tags": "gemini-js-sdk,pass-through-endpoint"
}
};
const genAI = new GoogleGenerativeAI("sk-1234");
const model = genAI.getGenerativeModel(modelParams, requestOptions);
async function main() {
try {
const result = await model.generateContent("Explain how AI works");
console.log(result.response.text());
} catch (error) {
console.error('Error:', error);
}
}
main();
```
</TabItem>
</Tabs>
```

File diff suppressed because it is too large Load diff

View file

@ -10,35 +10,6 @@ LiteLLM supports all anthropic models.
- `claude-2.1`
- `claude-instant-1.2`
| Property | Details |
|-------|-------|
| Description | Claude is a highly performant, trustworthy, and intelligent AI platform built by Anthropic. Claude excels at tasks involving language, reasoning, analysis, coding, and more. |
| Provider Route on LiteLLM | `anthropic/` (add this prefix to the model name, to route any requests to Anthropic - e.g. `anthropic/claude-3-5-sonnet-20240620`) |
| Provider Doc | [Anthropic ↗](https://docs.anthropic.com/en/docs/build-with-claude/overview) |
| API Endpoint for Provider | https://api.anthropic.com |
| Supported Endpoints | `/chat/completions` |
## Supported OpenAI Parameters
Check this in code, [here](../completion/input.md#translated-openai-params)
```
"stream",
"stop",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
"tools",
"tool_choice",
"extra_headers",
"parallel_tool_calls",
"response_format",
"user"
```
:::info
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed.
@ -1035,3 +1006,20 @@ curl http://0.0.0.0:4000/v1/chat/completions \
</TabItem>
</Tabs>
## All Supported OpenAI Params
```
"stream",
"stop",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
"tools",
"tool_choice",
"extra_headers",
"parallel_tool_calls",
"response_format",
"user"
```

View file

@ -1,59 +0,0 @@
# File Management
## `include` external YAML files in a config.yaml
You can use `include` to include external YAML files in a config.yaml.
**Quick Start Usage:**
To include a config file, use `include` with either a single file or a list of files.
Contents of `parent_config.yaml`:
```yaml
include:
- model_config.yaml # 👈 Key change, will include the contents of model_config.yaml
litellm_settings:
callbacks: ["prometheus"]
```
Contents of `model_config.yaml`:
```yaml
model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: fake-anthropic-endpoint
litellm_params:
model: anthropic/fake
api_base: https://exampleanthropicendpoint-production.up.railway.app/
```
Start proxy server
This will start the proxy server with config `parent_config.yaml`. Since the `include` directive is used, the server will also include the contents of `model_config.yaml`.
```
litellm --config parent_config.yaml --detailed_debug
```
## Examples using `include`
Include a single file:
```yaml
include:
- model_config.yaml
```
Include multiple files:
```yaml
include:
- model_config.yaml
- another_config.yaml
```

View file

@ -1,507 +0,0 @@
# All settings
```yaml
environment_variables: {}
model_list:
- model_name: string
litellm_params: {}
model_info:
id: string
mode: embedding
input_cost_per_token: 0
output_cost_per_token: 0
max_tokens: 2048
base_model: gpt-4-1106-preview
additionalProp1: {}
litellm_settings:
# Logging/Callback settings
success_callback: ["langfuse"] # list of success callbacks
failure_callback: ["sentry"] # list of failure callbacks
callbacks: ["otel"] # list of callbacks - runs on success and failure
service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus
turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
# Networking settings
request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API
set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
json_logs: boolean # if true, logs will be in json format
# Fallbacks, reliability
default_fallbacks: ["claude-opus"] # set default_fallbacks, in case a specific model group is misconfigured / bad.
content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}] # fallbacks for ContentPolicyErrors
context_window_fallbacks: [{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}] # fallbacks for ContextWindowExceededErrors
# Caching settings
cache: true
cache_params: # set cache params for redis
type: redis # type of cache to initialize
# Optional - Redis Settings
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
port: 6379 # The port number for the Redis cache. Required if type is "redis".
password: "your_password" # The password for the Redis cache. Required if type is "redis".
namespace: "litellm.caching.caching" # namespace for redis cache
# Optional - Redis Cluster Settings
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
# Optional - Redis Sentinel Settings
service_name: "mymaster"
sentinel_nodes: [["localhost", 26379]]
# Optional - Qdrant Semantic Cache Settings
qdrant_semantic_cache_embedding_model: openai-embedding # the model should be defined on the model_list
qdrant_collection_name: test_collection
qdrant_quantization_config: binary
similarity_threshold: 0.8 # similarity threshold for semantic cache
# Optional - S3 Cache Settings
s3_bucket_name: cache-bucket-litellm # AWS Bucket Name for S3
s3_region_name: us-west-2 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 bucket
# Common Cache settings
# Optional - Supported call types for caching
supported_call_types: ["acompletion", "atext_completion", "aembedding", "atranscription"]
# /chat/completions, /completions, /embeddings, /audio/transcriptions
mode: default_off # if default_off, you need to opt in to caching on a per call basis
ttl: 600 # ttl for caching
callback_settings:
otel:
message_logging: boolean # OTEL logging callback specific settings
general_settings:
completion_model: string
disable_spend_logs: boolean # turn off writing each transaction to the db
disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint)
disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached
disable_reset_budget: boolean # turn off reset budget scheduled task
disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking
enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param
allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
key_management_system: google_kms # either google_kms or azure_kms
master_key: string
# Database Settings
database_url: string
database_connection_pool_limit: 0 # default 100
database_connection_timeout: 0 # default 60s
allow_requests_on_db_unavailable: boolean # if true, will allow requests that can not connect to the DB to verify Virtual Key to still work
custom_auth: string
max_parallel_requests: 0 # the max parallel requests allowed per deployment
global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up
infer_model_from_keys: true
background_health_checks: true
health_check_interval: 300
alerting: ["slack", "email"]
alerting_threshold: 0
use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints
```
### litellm_settings - Reference
| Name | Type | Description |
|------|------|-------------|
| success_callback | array of strings | List of success callbacks. [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
| failure_callback | array of strings | List of failure callbacks [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
| callbacks | array of strings | List of callbacks - runs on success and failure [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
| service_callbacks | array of strings | System health monitoring - Logs redis, postgres failures on specified services (e.g. datadog, prometheus) [Doc Metrics](prometheus) |
| turn_off_message_logging | boolean | If true, prevents messages and responses from being logged to callbacks, but request metadata will still be logged [Proxy Logging](logging) |
| modify_params | boolean | If true, allows modifying the parameters of the request before it is sent to the LLM provider |
| enable_preview_features | boolean | If true, enables preview features - e.g. Azure O1 Models with streaming support.|
| redact_user_api_key_info | boolean | If true, redacts information about the user api key from logs [Proxy Logging](logging#redacting-userapikeyinfo) |
| langfuse_default_tags | array of strings | Default tags for Langfuse Logging. Use this if you want to control which LiteLLM-specific fields are logged as tags by the LiteLLM proxy. By default LiteLLM Proxy logs no LiteLLM-specific fields as tags. [Further docs](./logging#litellm-specific-tags-on-langfuse---cache_hit-cache_key) |
| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION |
| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) |
| force_ipv4 | boolean | If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API |
| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
| cache | boolean | If true, enables caching. [Further docs](./caching) |
| cache_params | object | Parameters for the cache. [Further docs](./caching) |
| cache_params.type | string | The type of cache to initialize. Can be one of ["local", "redis", "redis-semantic", "s3", "disk", "qdrant-semantic"]. Defaults to "redis". [Furher docs](./caching) |
| cache_params.host | string | The host address for the Redis cache. Required if type is "redis". |
| cache_params.port | integer | The port number for the Redis cache. Required if type is "redis". |
| cache_params.password | string | The password for the Redis cache. Required if type is "redis". |
| cache_params.namespace | string | The namespace for the Redis cache. |
| cache_params.redis_startup_nodes | array of objects | Redis Cluster Settings. [Further docs](./caching) |
| cache_params.service_name | string | Redis Sentinel Settings. [Further docs](./caching) |
| cache_params.sentinel_nodes | array of arrays | Redis Sentinel Settings. [Further docs](./caching) |
| cache_params.ttl | integer | The time (in seconds) to store entries in cache. |
| cache_params.qdrant_semantic_cache_embedding_model | string | The embedding model to use for qdrant semantic cache. |
| cache_params.qdrant_collection_name | string | The name of the collection to use for qdrant semantic cache. |
| cache_params.qdrant_quantization_config | string | The quantization configuration for the qdrant semantic cache. |
| cache_params.similarity_threshold | float | The similarity threshold for the semantic cache. |
| cache_params.s3_bucket_name | string | The name of the S3 bucket to use for the semantic cache. |
| cache_params.s3_region_name | string | The region name for the S3 bucket. |
| cache_params.s3_aws_access_key_id | string | The AWS access key ID for the S3 bucket. |
| cache_params.s3_aws_secret_access_key | string | The AWS secret access key for the S3 bucket. |
| cache_params.s3_endpoint_url | string | Optional - The endpoint URL for the S3 bucket. |
| cache_params.supported_call_types | array of strings | The types of calls to cache. [Further docs](./caching) |
| cache_params.mode | string | The mode of the cache. [Further docs](./caching) |
| disable_end_user_cost_tracking | boolean | If true, turns off end user cost tracking on prometheus metrics + litellm spend logs table on proxy. |
| key_generation_settings | object | Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation) |
### general_settings - Reference
| Name | Type | Description |
|------|------|-------------|
| completion_model | string | The default model to use for completions when `model` is not specified in the request |
| disable_spend_logs | boolean | If true, turns off writing each transaction to the database |
| disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) |
| disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached |
| disable_reset_budget | boolean | If true, turns off reset budget scheduled task |
| disable_adding_master_key_hash_to_db | boolean | If true, turns off storing master key hash in db |
| enable_jwt_auth | boolean | allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims. [Doc on JWT Tokens](token_auth) |
| enforce_user_param | boolean | If true, requires all OpenAI endpoint requests to have a 'user' param. [Doc on call hooks](call_hooks)|
| allowed_routes | array of strings | List of allowed proxy API routes a user can access [Doc on controlling allowed routes](enterprise#control-available-public-private-routes)|
| key_management_system | string | Specifies the key management system. [Doc Secret Managers](../secret) |
| master_key | string | The master key for the proxy [Set up Virtual Keys](virtual_keys) |
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
| infer_model_from_keys | boolean | If true, infers the model from the provided keys |
| background_health_checks | boolean | If true, enables background health checks. [Doc on health checks](health) |
| health_check_interval | integer | The interval for health checks in seconds [Doc on health checks](health) |
| alerting | array of strings | List of alerting methods [Doc on Slack Alerting](alerting) |
| alerting_threshold | integer | The threshold for triggering alerts [Doc on Slack Alerting](alerting) |
| use_client_credentials_pass_through_routes | boolean | If true, uses client credentials for all pass-through routes. [Doc on pass through routes](pass_through) |
| health_check_details | boolean | If false, hides health check details (e.g. remaining rate limit). [Doc on health checks](health) |
| public_routes | List[str] | (Enterprise Feature) Control list of public routes |
| alert_types | List[str] | Control list of alert types to send to slack (Doc on alert types)[./alerting.md] |
| enforced_params | List[str] | (Enterprise Feature) List of params that must be included in all requests to the proxy |
| enable_oauth2_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
| use_x_forwarded_for | str | If true, uses the X-Forwarded-For header to get the client IP address |
| service_account_settings | List[Dict[str, Any]] | Set `service_account_settings` if you want to create settings that only apply to service account keys (Doc on service accounts)[./service_accounts.md] |
| image_generation_model | str | The default model to use for image generation - ignores model set in request |
| store_model_in_db | boolean | If true, allows `/model/new` endpoint to store model information in db. Endpoint disabled by default. [Doc on `/model/new` endpoint](./model_management.md#create-a-new-model) |
| max_request_size_mb | int | The maximum size for requests in MB. Requests above this size will be rejected. |
| max_response_size_mb | int | The maximum size for responses in MB. LLM Responses above this size will not be sent. |
| proxy_budget_rescheduler_min_time | int | The minimum time (in seconds) to wait before checking db for budget resets. **Default is 597 seconds** |
| proxy_budget_rescheduler_max_time | int | The maximum time (in seconds) to wait before checking db for budget resets. **Default is 605 seconds** |
| proxy_batch_write_at | int | Time (in seconds) to wait before batch writing spend logs to the db. **Default is 10 seconds** |
| alerting_args | dict | Args for Slack Alerting [Doc on Slack Alerting](./alerting.md) |
| custom_key_generate | str | Custom function for key generation [Doc on custom key generation](./virtual_keys.md#custom--key-generate) |
| allowed_ips | List[str] | List of IPs allowed to access the proxy. If not set, all IPs are allowed. |
| embedding_model | str | The default model to use for embeddings - ignores model set in request |
| default_team_disabled | boolean | If true, users cannot create 'personal' keys (keys with no team_id). |
| alert_to_webhook_url | Dict[str] | [Specify a webhook url for each alert type.](./alerting.md#set-specific-slack-channels-per-alert-type) |
| key_management_settings | List[Dict[str, Any]] | Settings for key management system (e.g. AWS KMS, Azure Key Vault) [Doc on key management](../secret.md) |
| allow_user_auth | boolean | (Deprecated) old approach for user authentication. |
| user_api_key_cache_ttl | int | The time (in seconds) to cache user api keys in memory. |
| disable_prisma_schema_update | boolean | If true, turns off automatic schema updates to DB |
| litellm_key_header_name | str | If set, allows passing LiteLLM keys as a custom header. [Doc on custom headers](./virtual_keys.md#custom-headers) |
| moderation_model | str | The default model to use for moderation. |
| custom_sso | str | Path to a python file that implements custom SSO logic. [Doc on custom SSO](./custom_sso.md) |
| allow_client_side_credentials | boolean | If true, allows passing client side credentials to the proxy. (Useful when testing finetuning models) [Doc on client side credentials](./virtual_keys.md#client-side-credentials) |
| admin_only_routes | List[str] | (Enterprise Feature) List of routes that are only accessible to admin users. [Doc on admin only routes](./enterprise#control-available-public-private-routes) |
| use_azure_key_vault | boolean | If true, load keys from azure key vault |
| use_google_kms | boolean | If true, load keys from google kms |
| spend_report_frequency | str | Specify how often you want a Spend Report to be sent (e.g. "1d", "2d", "30d") [More on this](./alerting.md#spend-report-frequency) |
| ui_access_mode | Literal["admin_only"] | If set, restricts access to the UI to admin users only. [Docs](./ui.md#restrict-ui-access) |
| litellm_jwtauth | Dict[str, Any] | Settings for JWT authentication. [Docs](./token_auth.md) |
| litellm_license | str | The license key for the proxy. [Docs](../enterprise.md#how-does-deployment-with-enterprise-license-work) |
| oauth2_config_mappings | Dict[str, str] | Define the OAuth2 config mappings |
| pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) |
| enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). |
| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call |
### router_settings - Reference
:::info
Most values can also be set via `litellm_settings`. If you see overlapping values, settings on `router_settings` will override those on `litellm_settings`.
:::
```yaml
router_settings:
routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
redis_host: <your-redis-host> # string
redis_password: <your-redis-password> # string
redis_port: <your-redis-port> # string
enable_pre_call_check: true # bool - Before call is made check if a call is within model context window
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
disable_cooldowns: True # bool - Disable cooldowns for all models
enable_tag_filtering: True # bool - Use tag based routing for requests
retry_policy: { # Dict[str, int]: retry policy for different types of exceptions
"AuthenticationErrorRetries": 3,
"TimeoutErrorRetries": 3,
"RateLimitErrorRetries": 3,
"ContentPolicyViolationErrorRetries": 4,
"InternalServerErrorRetries": 4
}
allowed_fails_policy: {
"BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
"AuthenticationErrorAllowedFails": 10, # int
"TimeoutErrorAllowedFails": 12, # int
"RateLimitErrorAllowedFails": 10000, # int
"ContentPolicyViolationErrorAllowedFails": 15, # int
"InternalServerErrorAllowedFails": 20, # int
}
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations
fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors
```
| Name | Type | Description |
|------|------|-------------|
| routing_strategy | string | The strategy used for routing requests. Options: "simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing". Default is "simple-shuffle". [More information here](../routing) |
| redis_host | string | The host address for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** |
| redis_password | string | The password for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** |
| redis_port | string | The port number for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them**|
| enable_pre_call_check | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) |
| content_policy_fallbacks | array of objects | Specifies fallback models for content policy violations. [More information here](reliability) |
| fallbacks | array of objects | Specifies fallback models for all types of errors. [More information here](reliability) |
| enable_tag_filtering | boolean | If true, uses tag based routing for requests [Tag Based Routing](tag_routing) |
| cooldown_time | integer | The duration (in seconds) to cooldown a model if it exceeds the allowed failures. |
| disable_cooldowns | boolean | If true, disables cooldowns for all models. [More information here](reliability) |
| retry_policy | object | Specifies the number of retries for different types of exceptions. [More information here](reliability) |
| allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) |
| allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) |
| default_max_parallel_requests | Optional[int] | The default maximum number of parallel requests for a deployment. |
| default_priority | (Optional[int]) | The default priority for a request. Only for '.scheduler_acompletion()'. Default is None. |
| polling_interval | (Optional[float]) | frequency of polling queue. Only for '.scheduler_acompletion()'. Default is 3ms. |
| max_fallbacks | Optional[int] | The maximum number of fallbacks to try before exiting the call. Defaults to 5. |
| default_litellm_params | Optional[dict] | The default litellm parameters to add to all requests (e.g. `temperature`, `max_tokens`). |
| timeout | Optional[float] | The default timeout for a request. |
| debug_level | Literal["DEBUG", "INFO"] | The debug level for the logging library in the router. Defaults to "INFO". |
| client_ttl | int | Time-to-live for cached clients in seconds. Defaults to 3600. |
| cache_kwargs | dict | Additional keyword arguments for the cache initialization. |
| routing_strategy_args | dict | Additional keyword arguments for the routing strategy - e.g. lowest latency routing default ttl |
| model_group_alias | dict | Model group alias mapping. E.g. `{"claude-3-haiku": "claude-3-haiku-20240229"}` |
| num_retries | int | Number of retries for a request. Defaults to 3. |
| default_fallbacks | Optional[List[str]] | Fallbacks to try if no model group-specific fallbacks are defined. |
| caching_groups | Optional[List[tuple]] | List of model groups for caching across model groups. Defaults to None. - e.g. caching_groups=[("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")]|
| alerting_config | AlertingConfig | [SDK-only arg] Slack alerting configuration. Defaults to None. [Further Docs](../routing.md#alerting-) |
| assistants_config | AssistantsConfig | Set on proxy via `assistant_settings`. [Further docs](../assistants.md) |
| set_verbose | boolean | [DEPRECATED PARAM - see debug docs](./debugging.md) If true, sets the logging level to verbose. |
| retry_after | int | Time to wait before retrying a request in seconds. Defaults to 0. If `x-retry-after` is received from LLM API, this value is overridden. |
| provider_budget_config | ProviderBudgetConfig | Provider budget configuration. Use this to set llm_provider budget limits. example $100/day to OpenAI, $100/day to Azure, etc. Defaults to None. [Further Docs](./provider_budget_routing.md) |
| enable_pre_call_checks | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) |
| model_group_retry_policy | Dict[str, RetryPolicy] | [SDK-only arg] Set retry policy for model groups. |
| context_window_fallbacks | List[Dict[str, List[str]]] | Fallback models for context window violations. |
| redis_url | str | URL for Redis server. **Known performance issue with Redis URL.** |
| cache_responses | boolean | Flag to enable caching LLM Responses, if cache set under `router_settings`. If true, caches responses. Defaults to False. |
| router_general_settings | RouterGeneralSettings | [SDK-Only] Router general settings - contains optimizations like 'async_only_mode'. [Docs](../routing.md#router-general-settings) |
### environment variables - Reference
| Name | Description |
|------|-------------|
| ACTIONS_ID_TOKEN_REQUEST_TOKEN | Token for requesting ID in GitHub Actions
| ACTIONS_ID_TOKEN_REQUEST_URL | URL for requesting ID token in GitHub Actions
| AISPEND_ACCOUNT_ID | Account ID for AI Spend
| AISPEND_API_KEY | API Key for AI Spend
| ALLOWED_EMAIL_DOMAINS | List of email domains allowed for access
| ARIZE_API_KEY | API key for Arize platform integration
| ARIZE_SPACE_KEY | Space key for Arize platform
| ARGILLA_BATCH_SIZE | Batch size for Argilla logging
| ARGILLA_API_KEY | API key for Argilla platform
| ARGILLA_SAMPLING_RATE | Sampling rate for Argilla logging
| ARGILLA_DATASET_NAME | Dataset name for Argilla logging
| ARGILLA_BASE_URL | Base URL for Argilla service
| ATHINA_API_KEY | API key for Athina service
| AUTH_STRATEGY | Strategy used for authentication (e.g., OAuth, API key)
| AWS_ACCESS_KEY_ID | Access Key ID for AWS services
| AWS_PROFILE_NAME | AWS CLI profile name to be used
| AWS_REGION_NAME | Default AWS region for service interactions
| AWS_ROLE_NAME | Role name for AWS IAM usage
| AWS_SECRET_ACCESS_KEY | Secret Access Key for AWS services
| AWS_SESSION_NAME | Name for AWS session
| AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS
| AZURE_API_VERSION | Version of the Azure API being used
| AZURE_AUTHORITY_HOST | Azure authority host URL
| AZURE_CLIENT_ID | Client ID for Azure services
| AZURE_CLIENT_SECRET | Client secret for Azure services
| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token
| AZURE_KEY_VAULT_URI | URI for Azure Key Vault
| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
| BRAINTRUST_API_KEY | API key for Braintrust integration
| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI
| CIRCLE_OIDC_TOKEN_V2 | Version 2 of the OpenID Connect token for CircleCI
| CONFIG_FILE_PATH | File path for configuration file
| CUSTOM_TIKTOKEN_CACHE_DIR | Custom directory for Tiktoken cache
| DATABASE_HOST | Hostname for the database server
| DATABASE_NAME | Name of the database
| DATABASE_PASSWORD | Password for the database user
| DATABASE_PORT | Port number for database connection
| DATABASE_SCHEMA | Schema name used in the database
| DATABASE_URL | Connection URL for the database
| DATABASE_USER | Username for database connection
| DATABASE_USERNAME | Alias for database user
| DATABRICKS_API_BASE | Base URL for Databricks API
| DD_BASE_URL | Base URL for Datadog integration
| DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration
| _DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration
| DD_API_KEY | API key for Datadog integration
| DD_SITE | Site URL for Datadog (e.g., datadoghq.com)
| DD_SOURCE | Source identifier for Datadog logs
| DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback
| DD_SERVICE | Service identifier for Datadog logs. Defaults to "litellm-server"
| DD_VERSION | Version identifier for Datadog logs. Defaults to "unknown"
| DEBUG_OTEL | Enable debug mode for OpenTelemetry
| DIRECT_URL | Direct URL for service endpoint
| DISABLE_ADMIN_UI | Toggle to disable the admin UI
| DISABLE_SCHEMA_UPDATE | Toggle to disable schema updates
| DOCS_DESCRIPTION | Description text for documentation pages
| DOCS_FILTERED | Flag indicating filtered documentation
| DOCS_TITLE | Title of the documentation pages
| DOCS_URL | The path to the Swagger API documentation. **By default this is "/"**
| EMAIL_SUPPORT_CONTACT | Support contact email address
| GCS_BUCKET_NAME | Name of the Google Cloud Storage bucket
| GCS_PATH_SERVICE_ACCOUNT | Path to the Google Cloud service account JSON file
| GCS_FLUSH_INTERVAL | Flush interval for GCS logging (in seconds). Specify how often you want a log to be sent to GCS. **Default is 20 seconds**
| GCS_BATCH_SIZE | Batch size for GCS logging. Specify after how many logs you want to flush to GCS. If `BATCH_SIZE` is set to 10, logs are flushed every 10 logs. **Default is 2048**
| GENERIC_AUTHORIZATION_ENDPOINT | Authorization endpoint for generic OAuth providers
| GENERIC_CLIENT_ID | Client ID for generic OAuth providers
| GENERIC_CLIENT_SECRET | Client secret for generic OAuth providers
| GENERIC_CLIENT_STATE | State parameter for generic client authentication
| GENERIC_INCLUDE_CLIENT_ID | Include client ID in requests for OAuth
| GENERIC_SCOPE | Scope settings for generic OAuth providers
| GENERIC_TOKEN_ENDPOINT | Token endpoint for generic OAuth providers
| GENERIC_USER_DISPLAY_NAME_ATTRIBUTE | Attribute for user's display name in generic auth
| GENERIC_USER_EMAIL_ATTRIBUTE | Attribute for user's email in generic auth
| GENERIC_USER_FIRST_NAME_ATTRIBUTE | Attribute for user's first name in generic auth
| GENERIC_USER_ID_ATTRIBUTE | Attribute for user ID in generic auth
| GENERIC_USER_LAST_NAME_ATTRIBUTE | Attribute for user's last name in generic auth
| GENERIC_USER_PROVIDER_ATTRIBUTE | Attribute specifying the user's provider
| GENERIC_USER_ROLE_ATTRIBUTE | Attribute specifying the user's role
| GENERIC_USERINFO_ENDPOINT | Endpoint to fetch user information in generic OAuth
| GALILEO_BASE_URL | Base URL for Galileo platform
| GALILEO_PASSWORD | Password for Galileo authentication
| GALILEO_PROJECT_ID | Project ID for Galileo usage
| GALILEO_USERNAME | Username for Galileo authentication
| GREENSCALE_API_KEY | API key for Greenscale service
| GREENSCALE_ENDPOINT | Endpoint URL for Greenscale service
| GOOGLE_APPLICATION_CREDENTIALS | Path to Google Cloud credentials JSON file
| GOOGLE_CLIENT_ID | Client ID for Google OAuth
| GOOGLE_CLIENT_SECRET | Client secret for Google OAuth
| GOOGLE_KMS_RESOURCE_NAME | Name of the resource in Google KMS
| HF_API_BASE | Base URL for Hugging Face API
| HELICONE_API_KEY | API key for Helicone service
| HUGGINGFACE_API_BASE | Base URL for Hugging Face API
| IAM_TOKEN_DB_AUTH | IAM token for database authentication
| JSON_LOGS | Enable JSON formatted logging
| JWT_AUDIENCE | Expected audience for JWT tokens
| JWT_PUBLIC_KEY_URL | URL to fetch public key for JWT verification
| LAGO_API_BASE | Base URL for Lago API
| LAGO_API_CHARGE_BY | Parameter to determine charge basis in Lago
| LAGO_API_EVENT_CODE | Event code for Lago API events
| LAGO_API_KEY | API key for accessing Lago services
| LANGFUSE_DEBUG | Toggle debug mode for Langfuse
| LANGFUSE_FLUSH_INTERVAL | Interval for flushing Langfuse logs
| LANGFUSE_HOST | Host URL for Langfuse service
| LANGFUSE_PUBLIC_KEY | Public key for Langfuse authentication
| LANGFUSE_RELEASE | Release version of Langfuse integration
| LANGFUSE_SECRET_KEY | Secret key for Langfuse authentication
| LANGSMITH_API_KEY | API key for Langsmith platform
| LANGSMITH_BASE_URL | Base URL for Langsmith service
| LANGSMITH_BATCH_SIZE | Batch size for operations in Langsmith
| LANGSMITH_DEFAULT_RUN_NAME | Default name for Langsmith run
| LANGSMITH_PROJECT | Project name for Langsmith integration
| LANGSMITH_SAMPLING_RATE | Sampling rate for Langsmith logging
| LANGTRACE_API_KEY | API key for Langtrace service
| LITERAL_API_KEY | API key for Literal integration
| LITERAL_API_URL | API URL for Literal service
| LITERAL_BATCH_SIZE | Batch size for Literal operations
| LITELLM_DONT_SHOW_FEEDBACK_BOX | Flag to hide feedback box in LiteLLM UI
| LITELLM_DROP_PARAMS | Parameters to drop in LiteLLM requests
| LITELLM_EMAIL | Email associated with LiteLLM account
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM
| LITELLM_HOSTED_UI | URL of the hosted UI for LiteLLM
| LITELLM_LICENSE | License key for LiteLLM usage
| LITELLM_LOCAL_MODEL_COST_MAP | Local configuration for model cost mapping in LiteLLM
| LITELLM_LOG | Enable detailed logging for LiteLLM
| LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development)
| LITELLM_SALT_KEY | Salt key for encryption in LiteLLM
| LITELLM_SECRET_AWS_KMS_LITELLM_LICENSE | AWS KMS encrypted license for LiteLLM
| LITELLM_TOKEN | Access token for LiteLLM integration
| LOGFIRE_TOKEN | Token for Logfire logging service
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure
| NO_DOCS | Flag to disable documentation generation
| NO_PROXY | List of addresses to bypass proxy
| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval
| OPENAI_API_BASE | Base URL for OpenAI API
| OPENAI_API_KEY | API key for OpenAI services
| OPENAI_ORGANIZATION | Organization identifier for OpenAI
| OPENID_BASE_URL | Base URL for OpenID Connect services
| OPENID_CLIENT_ID | Client ID for OpenID Connect authentication
| OPENID_CLIENT_SECRET | Client secret for OpenID Connect authentication
| OPENMETER_API_ENDPOINT | API endpoint for OpenMeter integration
| OPENMETER_API_KEY | API key for OpenMeter services
| OPENMETER_EVENT_TYPE | Type of events sent to OpenMeter
| OTEL_ENDPOINT | OpenTelemetry endpoint for traces
| OTEL_ENVIRONMENT_NAME | Environment name for OpenTelemetry
| OTEL_EXPORTER | Exporter type for OpenTelemetry
| OTEL_HEADERS | Headers for OpenTelemetry requests
| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry
| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing
| PREDIBASE_API_BASE | Base URL for Predibase API
| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service
| PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service
| PROMETHEUS_URL | URL for Prometheus service
| PROMPTLAYER_API_KEY | API key for PromptLayer integration
| PROXY_ADMIN_ID | Admin identifier for proxy server
| PROXY_BASE_URL | Base URL for proxy service
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
| PROXY_MASTER_KEY | Master key for proxy authentication
| QDRANT_API_BASE | Base URL for Qdrant API
| QDRANT_API_KEY | API key for Qdrant service
| QDRANT_URL | Connection URL for Qdrant database
| REDIS_HOST | Hostname for Redis server
| REDIS_PASSWORD | Password for Redis service
| REDIS_PORT | Port number for Redis server
| REDOC_URL | The path to the Redoc Fast API documentation. **By default this is "/redoc"**
| SERVER_ROOT_PATH | Root path for the server application
| SET_VERBOSE | Flag to enable verbose logging
| SLACK_DAILY_REPORT_FREQUENCY | Frequency of daily Slack reports (e.g., daily, weekly)
| SLACK_WEBHOOK_URL | Webhook URL for Slack integration
| SMTP_HOST | Hostname for the SMTP server
| SMTP_PASSWORD | Password for SMTP authentication
| SMTP_PORT | Port number for SMTP server
| SMTP_SENDER_EMAIL | Email address used as the sender in SMTP transactions
| SMTP_SENDER_LOGO | Logo used in emails sent via SMTP
| SMTP_TLS | Flag to enable or disable TLS for SMTP connections
| SMTP_USERNAME | Username for SMTP authentication
| SPEND_LOGS_URL | URL for retrieving spend logs
| SSL_CERTIFICATE | Path to the SSL certificate file
| SSL_VERIFY | Flag to enable or disable SSL certificate verification
| SUPABASE_KEY | API key for Supabase service
| SUPABASE_URL | Base URL for Supabase instance
| TEST_EMAIL_ADDRESS | Email address used for testing purposes
| UI_LOGO_PATH | Path to the logo image used in the UI
| UI_PASSWORD | Password for accessing the UI
| UI_USERNAME | Username for accessing the UI
| UPSTREAM_LANGFUSE_DEBUG | Flag to enable debugging for upstream Langfuse
| UPSTREAM_LANGFUSE_HOST | Host URL for upstream Langfuse service
| UPSTREAM_LANGFUSE_PUBLIC_KEY | Public key for upstream Langfuse authentication
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
| WEBHOOK_URL | URL for receiving webhooks from external services

View file

@ -2,7 +2,7 @@ import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Overview
# Proxy Config.yaml
Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`master-key`) on the config.yaml.
| Param Name | Description |
@ -357,6 +357,77 @@ curl --location 'http://0.0.0.0:4000/v1/model/info' \
--data ''
```
### Provider specific wildcard routing
**Proxy all models from a provider**
Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml**
**Step 1** - define provider specific routing on config.yaml
```yaml
model_list:
# provider specific wildcard routing
- model_name: "anthropic/*"
litellm_params:
model: "anthropic/*"
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: "groq/*"
litellm_params:
model: "groq/*"
api_key: os.environ/GROQ_API_KEY
- model_name: "fo::*:static::*" # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*"
litellm_params:
model: "openai/fo::*:static::*"
api_key: os.environ/OPENAI_API_KEY
```
Step 2 - Run litellm proxy
```shell
$ litellm --config /path/to/config.yaml
```
Step 3 Test it
Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
```shell
curl http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "anthropic/claude-3-sonnet-20240229",
"messages": [
{"role": "user", "content": "Hello, Claude!"}
]
}'
```
Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
```shell
curl http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "groq/llama3-8b-8192",
"messages": [
{"role": "user", "content": "Hello, Claude!"}
]
}'
```
Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*`
```shell
curl http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "fo::hi::static::hi",
"messages": [
{"role": "user", "content": "Hello, Claude!"}
]
}'
```
### Load Balancing
:::info
@ -526,6 +597,479 @@ general_settings:
database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db
```
## **All settings**
```yaml
environment_variables: {}
model_list:
- model_name: string
litellm_params: {}
model_info:
id: string
mode: embedding
input_cost_per_token: 0
output_cost_per_token: 0
max_tokens: 2048
base_model: gpt-4-1106-preview
additionalProp1: {}
litellm_settings:
# Logging/Callback settings
success_callback: ["langfuse"] # list of success callbacks
failure_callback: ["sentry"] # list of failure callbacks
callbacks: ["otel"] # list of callbacks - runs on success and failure
service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus
turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
# Networking settings
request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API
set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
json_logs: boolean # if true, logs will be in json format
# Fallbacks, reliability
default_fallbacks: ["claude-opus"] # set default_fallbacks, in case a specific model group is misconfigured / bad.
content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}] # fallbacks for ContentPolicyErrors
context_window_fallbacks: [{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}] # fallbacks for ContextWindowExceededErrors
# Caching settings
cache: true
cache_params: # set cache params for redis
type: redis # type of cache to initialize
# Optional - Redis Settings
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
port: 6379 # The port number for the Redis cache. Required if type is "redis".
password: "your_password" # The password for the Redis cache. Required if type is "redis".
namespace: "litellm.caching.caching" # namespace for redis cache
# Optional - Redis Cluster Settings
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
# Optional - Redis Sentinel Settings
service_name: "mymaster"
sentinel_nodes: [["localhost", 26379]]
# Optional - Qdrant Semantic Cache Settings
qdrant_semantic_cache_embedding_model: openai-embedding # the model should be defined on the model_list
qdrant_collection_name: test_collection
qdrant_quantization_config: binary
similarity_threshold: 0.8 # similarity threshold for semantic cache
# Optional - S3 Cache Settings
s3_bucket_name: cache-bucket-litellm # AWS Bucket Name for S3
s3_region_name: us-west-2 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 bucket
# Common Cache settings
# Optional - Supported call types for caching
supported_call_types: ["acompletion", "atext_completion", "aembedding", "atranscription"]
# /chat/completions, /completions, /embeddings, /audio/transcriptions
mode: default_off # if default_off, you need to opt in to caching on a per call basis
ttl: 600 # ttl for caching
callback_settings:
otel:
message_logging: boolean # OTEL logging callback specific settings
general_settings:
completion_model: string
disable_spend_logs: boolean # turn off writing each transaction to the db
disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint)
disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached
disable_reset_budget: boolean # turn off reset budget scheduled task
disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking
enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param
allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
key_management_system: google_kms # either google_kms or azure_kms
master_key: string
# Database Settings
database_url: string
database_connection_pool_limit: 0 # default 100
database_connection_timeout: 0 # default 60s
allow_requests_on_db_unavailable: boolean # if true, will allow requests that can not connect to the DB to verify Virtual Key to still work
custom_auth: string
max_parallel_requests: 0 # the max parallel requests allowed per deployment
global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up
infer_model_from_keys: true
background_health_checks: true
health_check_interval: 300
alerting: ["slack", "email"]
alerting_threshold: 0
use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints
```
### litellm_settings - Reference
| Name | Type | Description |
|------|------|-------------|
| success_callback | array of strings | List of success callbacks. [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
| failure_callback | array of strings | List of failure callbacks [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
| callbacks | array of strings | List of callbacks - runs on success and failure [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
| service_callbacks | array of strings | System health monitoring - Logs redis, postgres failures on specified services (e.g. datadog, prometheus) [Doc Metrics](prometheus) |
| turn_off_message_logging | boolean | If true, prevents messages and responses from being logged to callbacks, but request metadata will still be logged [Proxy Logging](logging) |
| modify_params | boolean | If true, allows modifying the parameters of the request before it is sent to the LLM provider |
| enable_preview_features | boolean | If true, enables preview features - e.g. Azure O1 Models with streaming support.|
| redact_user_api_key_info | boolean | If true, redacts information about the user api key from logs [Proxy Logging](logging#redacting-userapikeyinfo) |
| langfuse_default_tags | array of strings | Default tags for Langfuse Logging. Use this if you want to control which LiteLLM-specific fields are logged as tags by the LiteLLM proxy. By default LiteLLM Proxy logs no LiteLLM-specific fields as tags. [Further docs](./logging#litellm-specific-tags-on-langfuse---cache_hit-cache_key) |
| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION |
| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) |
| force_ipv4 | boolean | If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API |
| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
| cache | boolean | If true, enables caching. [Further docs](./caching) |
| cache_params | object | Parameters for the cache. [Further docs](./caching) |
| cache_params.type | string | The type of cache to initialize. Can be one of ["local", "redis", "redis-semantic", "s3", "disk", "qdrant-semantic"]. Defaults to "redis". [Furher docs](./caching) |
| cache_params.host | string | The host address for the Redis cache. Required if type is "redis". |
| cache_params.port | integer | The port number for the Redis cache. Required if type is "redis". |
| cache_params.password | string | The password for the Redis cache. Required if type is "redis". |
| cache_params.namespace | string | The namespace for the Redis cache. |
| cache_params.redis_startup_nodes | array of objects | Redis Cluster Settings. [Further docs](./caching) |
| cache_params.service_name | string | Redis Sentinel Settings. [Further docs](./caching) |
| cache_params.sentinel_nodes | array of arrays | Redis Sentinel Settings. [Further docs](./caching) |
| cache_params.ttl | integer | The time (in seconds) to store entries in cache. |
| cache_params.qdrant_semantic_cache_embedding_model | string | The embedding model to use for qdrant semantic cache. |
| cache_params.qdrant_collection_name | string | The name of the collection to use for qdrant semantic cache. |
| cache_params.qdrant_quantization_config | string | The quantization configuration for the qdrant semantic cache. |
| cache_params.similarity_threshold | float | The similarity threshold for the semantic cache. |
| cache_params.s3_bucket_name | string | The name of the S3 bucket to use for the semantic cache. |
| cache_params.s3_region_name | string | The region name for the S3 bucket. |
| cache_params.s3_aws_access_key_id | string | The AWS access key ID for the S3 bucket. |
| cache_params.s3_aws_secret_access_key | string | The AWS secret access key for the S3 bucket. |
| cache_params.s3_endpoint_url | string | Optional - The endpoint URL for the S3 bucket. |
| cache_params.supported_call_types | array of strings | The types of calls to cache. [Further docs](./caching) |
| cache_params.mode | string | The mode of the cache. [Further docs](./caching) |
### general_settings - Reference
| Name | Type | Description |
|------|------|-------------|
| completion_model | string | The default model to use for completions when `model` is not specified in the request |
| disable_spend_logs | boolean | If true, turns off writing each transaction to the database |
| disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) |
| disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached |
| disable_reset_budget | boolean | If true, turns off reset budget scheduled task |
| disable_adding_master_key_hash_to_db | boolean | If true, turns off storing master key hash in db |
| enable_jwt_auth | boolean | allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims. [Doc on JWT Tokens](token_auth) |
| enforce_user_param | boolean | If true, requires all OpenAI endpoint requests to have a 'user' param. [Doc on call hooks](call_hooks)|
| allowed_routes | array of strings | List of allowed proxy API routes a user can access [Doc on controlling allowed routes](enterprise#control-available-public-private-routes)|
| key_management_system | string | Specifies the key management system. [Doc Secret Managers](../secret) |
| master_key | string | The master key for the proxy [Set up Virtual Keys](virtual_keys) |
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
| infer_model_from_keys | boolean | If true, infers the model from the provided keys |
| background_health_checks | boolean | If true, enables background health checks. [Doc on health checks](health) |
| health_check_interval | integer | The interval for health checks in seconds [Doc on health checks](health) |
| alerting | array of strings | List of alerting methods [Doc on Slack Alerting](alerting) |
| alerting_threshold | integer | The threshold for triggering alerts [Doc on Slack Alerting](alerting) |
| use_client_credentials_pass_through_routes | boolean | If true, uses client credentials for all pass-through routes. [Doc on pass through routes](pass_through) |
| health_check_details | boolean | If false, hides health check details (e.g. remaining rate limit). [Doc on health checks](health) |
| public_routes | List[str] | (Enterprise Feature) Control list of public routes |
| alert_types | List[str] | Control list of alert types to send to slack (Doc on alert types)[./alerting.md] |
| enforced_params | List[str] | (Enterprise Feature) List of params that must be included in all requests to the proxy |
| enable_oauth2_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
| use_x_forwarded_for | str | If true, uses the X-Forwarded-For header to get the client IP address |
| service_account_settings | List[Dict[str, Any]] | Set `service_account_settings` if you want to create settings that only apply to service account keys (Doc on service accounts)[./service_accounts.md] |
| image_generation_model | str | The default model to use for image generation - ignores model set in request |
| store_model_in_db | boolean | If true, allows `/model/new` endpoint to store model information in db. Endpoint disabled by default. [Doc on `/model/new` endpoint](./model_management.md#create-a-new-model) |
| max_request_size_mb | int | The maximum size for requests in MB. Requests above this size will be rejected. |
| max_response_size_mb | int | The maximum size for responses in MB. LLM Responses above this size will not be sent. |
| proxy_budget_rescheduler_min_time | int | The minimum time (in seconds) to wait before checking db for budget resets. **Default is 597 seconds** |
| proxy_budget_rescheduler_max_time | int | The maximum time (in seconds) to wait before checking db for budget resets. **Default is 605 seconds** |
| proxy_batch_write_at | int | Time (in seconds) to wait before batch writing spend logs to the db. **Default is 10 seconds** |
| alerting_args | dict | Args for Slack Alerting [Doc on Slack Alerting](./alerting.md) |
| custom_key_generate | str | Custom function for key generation [Doc on custom key generation](./virtual_keys.md#custom--key-generate) |
| allowed_ips | List[str] | List of IPs allowed to access the proxy. If not set, all IPs are allowed. |
| embedding_model | str | The default model to use for embeddings - ignores model set in request |
| default_team_disabled | boolean | If true, users cannot create 'personal' keys (keys with no team_id). |
| alert_to_webhook_url | Dict[str] | [Specify a webhook url for each alert type.](./alerting.md#set-specific-slack-channels-per-alert-type) |
| key_management_settings | List[Dict[str, Any]] | Settings for key management system (e.g. AWS KMS, Azure Key Vault) [Doc on key management](../secret.md) |
| allow_user_auth | boolean | (Deprecated) old approach for user authentication. |
| user_api_key_cache_ttl | int | The time (in seconds) to cache user api keys in memory. |
| disable_prisma_schema_update | boolean | If true, turns off automatic schema updates to DB |
| litellm_key_header_name | str | If set, allows passing LiteLLM keys as a custom header. [Doc on custom headers](./virtual_keys.md#custom-headers) |
| moderation_model | str | The default model to use for moderation. |
| custom_sso | str | Path to a python file that implements custom SSO logic. [Doc on custom SSO](./custom_sso.md) |
| allow_client_side_credentials | boolean | If true, allows passing client side credentials to the proxy. (Useful when testing finetuning models) [Doc on client side credentials](./virtual_keys.md#client-side-credentials) |
| admin_only_routes | List[str] | (Enterprise Feature) List of routes that are only accessible to admin users. [Doc on admin only routes](./enterprise#control-available-public-private-routes) |
| use_azure_key_vault | boolean | If true, load keys from azure key vault |
| use_google_kms | boolean | If true, load keys from google kms |
| spend_report_frequency | str | Specify how often you want a Spend Report to be sent (e.g. "1d", "2d", "30d") [More on this](./alerting.md#spend-report-frequency) |
| ui_access_mode | Literal["admin_only"] | If set, restricts access to the UI to admin users only. [Docs](./ui.md#restrict-ui-access) |
| litellm_jwtauth | Dict[str, Any] | Settings for JWT authentication. [Docs](./token_auth.md) |
| litellm_license | str | The license key for the proxy. [Docs](../enterprise.md#how-does-deployment-with-enterprise-license-work) |
| oauth2_config_mappings | Dict[str, str] | Define the OAuth2 config mappings |
| pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) |
| enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). |
| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call |
### router_settings - Reference
```yaml
router_settings:
routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
redis_host: <your-redis-host> # string
redis_password: <your-redis-password> # string
redis_port: <your-redis-port> # string
enable_pre_call_check: true # bool - Before call is made check if a call is within model context window
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
disable_cooldowns: True # bool - Disable cooldowns for all models
enable_tag_filtering: True # bool - Use tag based routing for requests
retry_policy: { # Dict[str, int]: retry policy for different types of exceptions
"AuthenticationErrorRetries": 3,
"TimeoutErrorRetries": 3,
"RateLimitErrorRetries": 3,
"ContentPolicyViolationErrorRetries": 4,
"InternalServerErrorRetries": 4
}
allowed_fails_policy: {
"BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
"AuthenticationErrorAllowedFails": 10, # int
"TimeoutErrorAllowedFails": 12, # int
"RateLimitErrorAllowedFails": 10000, # int
"ContentPolicyViolationErrorAllowedFails": 15, # int
"InternalServerErrorAllowedFails": 20, # int
}
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations
fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors
```
| Name | Type | Description |
|------|------|-------------|
| routing_strategy | string | The strategy used for routing requests. Options: "simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing". Default is "simple-shuffle". [More information here](../routing) |
| redis_host | string | The host address for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** |
| redis_password | string | The password for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** |
| redis_port | string | The port number for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them**|
| enable_pre_call_check | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) |
| content_policy_fallbacks | array of objects | Specifies fallback models for content policy violations. [More information here](reliability) |
| fallbacks | array of objects | Specifies fallback models for all types of errors. [More information here](reliability) |
| enable_tag_filtering | boolean | If true, uses tag based routing for requests [Tag Based Routing](tag_routing) |
| cooldown_time | integer | The duration (in seconds) to cooldown a model if it exceeds the allowed failures. |
| disable_cooldowns | boolean | If true, disables cooldowns for all models. [More information here](reliability) |
| retry_policy | object | Specifies the number of retries for different types of exceptions. [More information here](reliability) |
| allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) |
| allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) |
### environment variables - Reference
| Name | Description |
|------|-------------|
| ACTIONS_ID_TOKEN_REQUEST_TOKEN | Token for requesting ID in GitHub Actions
| ACTIONS_ID_TOKEN_REQUEST_URL | URL for requesting ID token in GitHub Actions
| AISPEND_ACCOUNT_ID | Account ID for AI Spend
| AISPEND_API_KEY | API Key for AI Spend
| ALLOWED_EMAIL_DOMAINS | List of email domains allowed for access
| ARIZE_API_KEY | API key for Arize platform integration
| ARIZE_SPACE_KEY | Space key for Arize platform
| ARGILLA_BATCH_SIZE | Batch size for Argilla logging
| ARGILLA_API_KEY | API key for Argilla platform
| ARGILLA_SAMPLING_RATE | Sampling rate for Argilla logging
| ARGILLA_DATASET_NAME | Dataset name for Argilla logging
| ARGILLA_BASE_URL | Base URL for Argilla service
| ATHINA_API_KEY | API key for Athina service
| AUTH_STRATEGY | Strategy used for authentication (e.g., OAuth, API key)
| AWS_ACCESS_KEY_ID | Access Key ID for AWS services
| AWS_PROFILE_NAME | AWS CLI profile name to be used
| AWS_REGION_NAME | Default AWS region for service interactions
| AWS_ROLE_NAME | Role name for AWS IAM usage
| AWS_SECRET_ACCESS_KEY | Secret Access Key for AWS services
| AWS_SESSION_NAME | Name for AWS session
| AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS
| AZURE_API_VERSION | Version of the Azure API being used
| AZURE_AUTHORITY_HOST | Azure authority host URL
| AZURE_CLIENT_ID | Client ID for Azure services
| AZURE_CLIENT_SECRET | Client secret for Azure services
| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token
| AZURE_KEY_VAULT_URI | URI for Azure Key Vault
| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
| BRAINTRUST_API_KEY | API key for Braintrust integration
| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI
| CIRCLE_OIDC_TOKEN_V2 | Version 2 of the OpenID Connect token for CircleCI
| CONFIG_FILE_PATH | File path for configuration file
| CUSTOM_TIKTOKEN_CACHE_DIR | Custom directory for Tiktoken cache
| DATABASE_HOST | Hostname for the database server
| DATABASE_NAME | Name of the database
| DATABASE_PASSWORD | Password for the database user
| DATABASE_PORT | Port number for database connection
| DATABASE_SCHEMA | Schema name used in the database
| DATABASE_URL | Connection URL for the database
| DATABASE_USER | Username for database connection
| DATABASE_USERNAME | Alias for database user
| DATABRICKS_API_BASE | Base URL for Databricks API
| DD_BASE_URL | Base URL for Datadog integration
| DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration
| _DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration
| DD_API_KEY | API key for Datadog integration
| DD_SITE | Site URL for Datadog (e.g., datadoghq.com)
| DD_SOURCE | Source identifier for Datadog logs
| DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback
| DEBUG_OTEL | Enable debug mode for OpenTelemetry
| DIRECT_URL | Direct URL for service endpoint
| DISABLE_ADMIN_UI | Toggle to disable the admin UI
| DISABLE_SCHEMA_UPDATE | Toggle to disable schema updates
| DOCS_DESCRIPTION | Description text for documentation pages
| DOCS_FILTERED | Flag indicating filtered documentation
| DOCS_TITLE | Title of the documentation pages
| DOCS_URL | The path to the Swagger API documentation. **By default this is "/"**
| EMAIL_SUPPORT_CONTACT | Support contact email address
| GCS_BUCKET_NAME | Name of the Google Cloud Storage bucket
| GCS_PATH_SERVICE_ACCOUNT | Path to the Google Cloud service account JSON file
| GCS_FLUSH_INTERVAL | Flush interval for GCS logging (in seconds). Specify how often you want a log to be sent to GCS. **Default is 20 seconds**
| GCS_BATCH_SIZE | Batch size for GCS logging. Specify after how many logs you want to flush to GCS. If `BATCH_SIZE` is set to 10, logs are flushed every 10 logs. **Default is 2048**
| GENERIC_AUTHORIZATION_ENDPOINT | Authorization endpoint for generic OAuth providers
| GENERIC_CLIENT_ID | Client ID for generic OAuth providers
| GENERIC_CLIENT_SECRET | Client secret for generic OAuth providers
| GENERIC_CLIENT_STATE | State parameter for generic client authentication
| GENERIC_INCLUDE_CLIENT_ID | Include client ID in requests for OAuth
| GENERIC_SCOPE | Scope settings for generic OAuth providers
| GENERIC_TOKEN_ENDPOINT | Token endpoint for generic OAuth providers
| GENERIC_USER_DISPLAY_NAME_ATTRIBUTE | Attribute for user's display name in generic auth
| GENERIC_USER_EMAIL_ATTRIBUTE | Attribute for user's email in generic auth
| GENERIC_USER_FIRST_NAME_ATTRIBUTE | Attribute for user's first name in generic auth
| GENERIC_USER_ID_ATTRIBUTE | Attribute for user ID in generic auth
| GENERIC_USER_LAST_NAME_ATTRIBUTE | Attribute for user's last name in generic auth
| GENERIC_USER_PROVIDER_ATTRIBUTE | Attribute specifying the user's provider
| GENERIC_USER_ROLE_ATTRIBUTE | Attribute specifying the user's role
| GENERIC_USERINFO_ENDPOINT | Endpoint to fetch user information in generic OAuth
| GALILEO_BASE_URL | Base URL for Galileo platform
| GALILEO_PASSWORD | Password for Galileo authentication
| GALILEO_PROJECT_ID | Project ID for Galileo usage
| GALILEO_USERNAME | Username for Galileo authentication
| GREENSCALE_API_KEY | API key for Greenscale service
| GREENSCALE_ENDPOINT | Endpoint URL for Greenscale service
| GOOGLE_APPLICATION_CREDENTIALS | Path to Google Cloud credentials JSON file
| GOOGLE_CLIENT_ID | Client ID for Google OAuth
| GOOGLE_CLIENT_SECRET | Client secret for Google OAuth
| GOOGLE_KMS_RESOURCE_NAME | Name of the resource in Google KMS
| HF_API_BASE | Base URL for Hugging Face API
| HELICONE_API_KEY | API key for Helicone service
| HUGGINGFACE_API_BASE | Base URL for Hugging Face API
| IAM_TOKEN_DB_AUTH | IAM token for database authentication
| JSON_LOGS | Enable JSON formatted logging
| JWT_AUDIENCE | Expected audience for JWT tokens
| JWT_PUBLIC_KEY_URL | URL to fetch public key for JWT verification
| LAGO_API_BASE | Base URL for Lago API
| LAGO_API_CHARGE_BY | Parameter to determine charge basis in Lago
| LAGO_API_EVENT_CODE | Event code for Lago API events
| LAGO_API_KEY | API key for accessing Lago services
| LANGFUSE_DEBUG | Toggle debug mode for Langfuse
| LANGFUSE_FLUSH_INTERVAL | Interval for flushing Langfuse logs
| LANGFUSE_HOST | Host URL for Langfuse service
| LANGFUSE_PUBLIC_KEY | Public key for Langfuse authentication
| LANGFUSE_RELEASE | Release version of Langfuse integration
| LANGFUSE_SECRET_KEY | Secret key for Langfuse authentication
| LANGSMITH_API_KEY | API key for Langsmith platform
| LANGSMITH_BASE_URL | Base URL for Langsmith service
| LANGSMITH_BATCH_SIZE | Batch size for operations in Langsmith
| LANGSMITH_DEFAULT_RUN_NAME | Default name for Langsmith run
| LANGSMITH_PROJECT | Project name for Langsmith integration
| LANGSMITH_SAMPLING_RATE | Sampling rate for Langsmith logging
| LANGTRACE_API_KEY | API key for Langtrace service
| LITERAL_API_KEY | API key for Literal integration
| LITERAL_API_URL | API URL for Literal service
| LITERAL_BATCH_SIZE | Batch size for Literal operations
| LITELLM_DONT_SHOW_FEEDBACK_BOX | Flag to hide feedback box in LiteLLM UI
| LITELLM_DROP_PARAMS | Parameters to drop in LiteLLM requests
| LITELLM_EMAIL | Email associated with LiteLLM account
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM
| LITELLM_HOSTED_UI | URL of the hosted UI for LiteLLM
| LITELLM_LICENSE | License key for LiteLLM usage
| LITELLM_LOCAL_MODEL_COST_MAP | Local configuration for model cost mapping in LiteLLM
| LITELLM_LOG | Enable detailed logging for LiteLLM
| LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development)
| LITELLM_SALT_KEY | Salt key for encryption in LiteLLM
| LITELLM_SECRET_AWS_KMS_LITELLM_LICENSE | AWS KMS encrypted license for LiteLLM
| LITELLM_TOKEN | Access token for LiteLLM integration
| LOGFIRE_TOKEN | Token for Logfire logging service
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure
| NO_DOCS | Flag to disable documentation generation
| NO_PROXY | List of addresses to bypass proxy
| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval
| OPENAI_API_BASE | Base URL for OpenAI API
| OPENAI_API_KEY | API key for OpenAI services
| OPENAI_ORGANIZATION | Organization identifier for OpenAI
| OPENID_BASE_URL | Base URL for OpenID Connect services
| OPENID_CLIENT_ID | Client ID for OpenID Connect authentication
| OPENID_CLIENT_SECRET | Client secret for OpenID Connect authentication
| OPENMETER_API_ENDPOINT | API endpoint for OpenMeter integration
| OPENMETER_API_KEY | API key for OpenMeter services
| OPENMETER_EVENT_TYPE | Type of events sent to OpenMeter
| OTEL_ENDPOINT | OpenTelemetry endpoint for traces
| OTEL_ENVIRONMENT_NAME | Environment name for OpenTelemetry
| OTEL_EXPORTER | Exporter type for OpenTelemetry
| OTEL_HEADERS | Headers for OpenTelemetry requests
| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry
| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing
| PREDIBASE_API_BASE | Base URL for Predibase API
| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service
| PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service
| PROMETHEUS_URL | URL for Prometheus service
| PROMPTLAYER_API_KEY | API key for PromptLayer integration
| PROXY_ADMIN_ID | Admin identifier for proxy server
| PROXY_BASE_URL | Base URL for proxy service
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
| PROXY_MASTER_KEY | Master key for proxy authentication
| QDRANT_API_BASE | Base URL for Qdrant API
| QDRANT_API_KEY | API key for Qdrant service
| QDRANT_URL | Connection URL for Qdrant database
| REDIS_HOST | Hostname for Redis server
| REDIS_PASSWORD | Password for Redis service
| REDIS_PORT | Port number for Redis server
| REDOC_URL | The path to the Redoc Fast API documentation. **By default this is "/redoc"**
| SERVER_ROOT_PATH | Root path for the server application
| SET_VERBOSE | Flag to enable verbose logging
| SLACK_DAILY_REPORT_FREQUENCY | Frequency of daily Slack reports (e.g., daily, weekly)
| SLACK_WEBHOOK_URL | Webhook URL for Slack integration
| SMTP_HOST | Hostname for the SMTP server
| SMTP_PASSWORD | Password for SMTP authentication
| SMTP_PORT | Port number for SMTP server
| SMTP_SENDER_EMAIL | Email address used as the sender in SMTP transactions
| SMTP_SENDER_LOGO | Logo used in emails sent via SMTP
| SMTP_TLS | Flag to enable or disable TLS for SMTP connections
| SMTP_USERNAME | Username for SMTP authentication
| SPEND_LOGS_URL | URL for retrieving spend logs
| SSL_CERTIFICATE | Path to the SSL certificate file
| SSL_VERIFY | Flag to enable or disable SSL certificate verification
| SUPABASE_KEY | API key for Supabase service
| SUPABASE_URL | Base URL for Supabase instance
| TEST_EMAIL_ADDRESS | Email address used for testing purposes
| UI_LOGO_PATH | Path to the logo image used in the UI
| UI_PASSWORD | Password for accessing the UI
| UI_USERNAME | Username for accessing the UI
| UPSTREAM_LANGFUSE_DEBUG | Flag to enable debugging for upstream Langfuse
| UPSTREAM_LANGFUSE_HOST | Host URL for upstream Langfuse service
| UPSTREAM_LANGFUSE_PUBLIC_KEY | Public key for upstream Langfuse authentication
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
| WEBHOOK_URL | URL for receiving webhooks from external services
## Extras

View file

@ -50,22 +50,18 @@ You can see the full DB Schema [here](https://github.com/BerriAI/litellm/blob/ma
| LiteLLM_ErrorLogs | Captures failed requests and errors. Stores exception details and request information. Helps with debugging and monitoring. | **Medium - on errors only** |
| LiteLLM_AuditLog | Tracks changes to system configuration. Records who made changes and what was modified. Maintains history of updates to teams, users, and models. | **Off by default**, **High - when enabled** |
## Disable `LiteLLM_SpendLogs` & `LiteLLM_ErrorLogs`
## How to Disable `LiteLLM_SpendLogs`
You can disable spend_logs and error_logs by setting `disable_spend_logs` and `disable_error_logs` to `True` on the `general_settings` section of your proxy_config.yaml file.
You can disable spend_logs by setting `disable_spend_logs` to `True` on the `general_settings` section of your proxy_config.yaml file.
```yaml
general_settings:
disable_spend_logs: True # Disable writing spend logs to DB
disable_error_logs: True # Disable writing error logs to DB
disable_spend_logs: True
```
### What is the impact of disabling these logs?
When disabling spend logs (`disable_spend_logs: True`):
### What is the impact of disabling `LiteLLM_SpendLogs`?
- You **will not** be able to view Usage on the LiteLLM UI
- You **will** continue seeing cost metrics on s3, Prometheus, Langfuse (any other Logging integration you are using)
When disabling error logs (`disable_error_logs: True`):
- You **will not** be able to view Errors on the LiteLLM UI
- You **will** continue seeing error logs in your application logs and any other logging integrations you are using

View file

@ -1,4 +1,4 @@
# Proxy - Load Balancing
# Multiple Instances
Load balance multiple instances of the same model
The proxy will handle routing requests (using LiteLLM's Router). **Set `rpm` in the config if you want maximize throughput**

View file

@ -23,7 +23,6 @@ general_settings:
# OPTIONAL Best Practices
disable_spend_logs: True # turn off writing each transaction to the db. We recommend doing this is you don't need to see Usage on the LiteLLM UI and are tracking metrics via Prometheus
disable_error_logs: True # turn off writing LLM Exceptions to DB
allow_requests_on_db_unavailable: True # Only USE when running LiteLLM on your VPC. Allow requests to still be processed even if the DB is unavailable. We recommend doing this if you're running LiteLLM on VPC that cannot be accessed from the public internet.
litellm_settings:
@ -103,22 +102,17 @@ general_settings:
allow_requests_on_db_unavailable: True
```
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
## 6. Disable spend_logs if you're not using the LiteLLM UI
By default, LiteLLM writes several types of logs to the database:
- Every LLM API request to the `LiteLLM_SpendLogs` table
- LLM Exceptions to the `LiteLLM_LogsErrors` table
By default LiteLLM will write every request to the `LiteLLM_SpendLogs` table. This is used for viewing Usage on the LiteLLM UI.
If you're not viewing these logs on the LiteLLM UI (most users use Prometheus for monitoring), you can disable them by setting the following flags to `True`:
If you're not viewing Usage on the LiteLLM UI (most users use Prometheus when this is disabled), you can disable spend_logs by setting `disable_spend_logs` to `True`.
```yaml
general_settings:
disable_spend_logs: True # Disable writing spend logs to DB
disable_error_logs: True # Disable writing error logs to DB
disable_spend_logs: True
```
[More information about what the Database is used for here](db_info)
## 7. Use Helm PreSync Hook for Database Migrations [BETA]
To ensure only one service manages database migrations, use our [Helm PreSync hook for Database Migrations](https://github.com/BerriAI/litellm/blob/main/deploy/charts/litellm-helm/templates/migrations-job.yaml). This ensures migrations are handled during `helm upgrade` or `helm install`, while LiteLLM pods explicitly disable migrations.

View file

@ -192,13 +192,3 @@ Here is a screenshot of the metrics you can monitor with the LiteLLM Grafana Das
|----------------------|--------------------------------------|
| `litellm_llm_api_failed_requests_metric` | **deprecated** use `litellm_proxy_failed_requests_metric` |
| `litellm_requests_metric` | **deprecated** use `litellm_proxy_total_requests_metric` |
## FAQ
### What are `_created` vs. `_total` metrics?
- `_created` metrics are metrics that are created when the proxy starts
- `_total` metrics are metrics that are incremented for each request
You should consume the `_total` metrics for your counting purposes

View file

@ -16,27 +16,25 @@ model_list:
api_key: os.environ/OPENAI_API_KEY
router_settings:
redis_host: <your-redis-host>
redis_password: <your-redis-password>
redis_port: <your-redis-port>
provider_budget_config:
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
azure:
budget_limit: 100
time_period: 1d
anthropic:
budget_limit: 100
time_period: 10d
vertex_ai:
budget_limit: 100
time_period: 12d
gemini:
budget_limit: 100
time_period: 12d
# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d
azure:
budget_limit: 100
time_period: 1d
anthropic:
budget_limit: 100
time_period: 10d
vertexai:
budget_limit: 100
time_period: 12d
gemini:
budget_limit: 100
time_period: 12d
general_settings:
master_key: sk-1234
@ -114,11 +112,8 @@ Expected response on failure
- If all providers exceed budget, raises an error
3. **Supported Time Periods**:
- Seconds: "Xs" (e.g., "30s")
- Minutes: "Xm" (e.g., "10m")
- Hours: "Xh" (e.g., "24h")
- Days: "Xd" (e.g., "1d", "30d")
- Months: "Xmo" (e.g., "1mo", "2mo")
- Format: "Xd" where X is number of days
- Examples: "1d" (1 day), "30d" (30 days)
4. **Requirements**:
- Redis required for tracking spend across instances
@ -134,31 +129,6 @@ This metric indicates the remaining budget for a provider in dollars (USD)
litellm_provider_remaining_budget_metric{api_provider="openai"} 10
```
## Multi-instance setup
If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances.
```yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
router_settings:
provider_budget_config:
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
# 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
general_settings:
master_key: sk-1234
```
## Spec for provider_budget_config
@ -166,12 +136,7 @@ The `provider_budget_config` is a dictionary where:
- **Key**: Provider name (string) - Must be a valid [LiteLLM provider name](https://docs.litellm.ai/docs/providers)
- **Value**: Budget configuration object with the following parameters:
- `budget_limit`: Float value representing the budget in USD
- `time_period`: Duration string in one of the following formats:
- Seconds: `"Xs"` (e.g., "30s")
- Minutes: `"Xm"` (e.g., "10m")
- Hours: `"Xh"` (e.g., "24h")
- Days: `"Xd"` (e.g., "1d", "30d")
- Months: `"Xmo"` (e.g., "1mo", "2mo")
- `time_period`: String in the format "Xd" where X is the number of days (e.g., "1d", "30d")
Example structure:
```yaml
@ -182,10 +147,4 @@ provider_budget_config:
azure:
budget_limit: 500.0 # $500 USD
time_period: "30d" # 30 day period
anthropic:
budget_limit: 200.0 # $200 USD
time_period: "1mo" # 1 month period
gemini:
budget_limit: 50.0 # $50 USD
time_period: "24h" # 24 hour period
```

View file

@ -2,7 +2,7 @@ import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Proxy - Fallbacks, Retries
# Fallbacks, Load Balancing, Retries
- Quick Start [load balancing](#test---load-balancing)
- Quick Start [client side fallbacks](#test---client-side-fallbacks)

View file

@ -217,10 +217,4 @@ litellm_settings:
max_parallel_requests: 1000 # (Optional[int], optional): Max number of requests that can be made in parallel. Defaults to None.
tpm_limit: 1000 #(Optional[int], optional): Tpm limit. Defaults to None.
rpm_limit: 1000 #(Optional[int], optional): Rpm limit. Defaults to None.
key_generation_settings: # Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation)
team_key_generation:
allowed_team_member_roles: ["admin"]
personal_key_generation: # maps to 'Default Team' on UI
allowed_user_roles: ["proxy_admin"]
```
```

View file

@ -1,4 +1,4 @@
# Team-based Routing
# 👥 Team-based Routing
## Routing
Route calls to different model groups based on the team-id

View file

@ -811,78 +811,6 @@ litellm_settings:
team_id: "core-infra"
```
### Restricting Key Generation
Use this to control who can generate keys. Useful when letting others create keys on the UI.
```yaml
litellm_settings:
key_generation_settings:
team_key_generation:
allowed_team_member_roles: ["admin"]
required_params: ["tags"] # require team admins to set tags for cost-tracking when generating a team key
personal_key_generation: # maps to 'Default Team' on UI
allowed_user_roles: ["proxy_admin"]
```
#### Spec
```python
class TeamUIKeyGenerationConfig(TypedDict):
allowed_team_member_roles: List[str]
required_params: List[str] # require params on `/key/generate` to be set if a team key (team_id in request) is being generated
class PersonalUIKeyGenerationConfig(TypedDict):
allowed_user_roles: List[LitellmUserRoles]
required_params: List[str] # require params on `/key/generate` to be set if a personal key (no team_id in request) is being generated
class StandardKeyGenerationConfig(TypedDict, total=False):
team_key_generation: TeamUIKeyGenerationConfig
personal_key_generation: PersonalUIKeyGenerationConfig
class LitellmUserRoles(str, enum.Enum):
"""
Admin Roles:
PROXY_ADMIN: admin over the platform
PROXY_ADMIN_VIEW_ONLY: can login, view all own keys, view all spend
ORG_ADMIN: admin over a specific organization, can create teams, users only within their organization
Internal User Roles:
INTERNAL_USER: can login, view/create/delete their own keys, view their spend
INTERNAL_USER_VIEW_ONLY: can login, view their own keys, view their own spend
Team Roles:
TEAM: used for JWT auth
Customer Roles:
CUSTOMER: External users -> these are customers
"""
# Admin Roles
PROXY_ADMIN = "proxy_admin"
PROXY_ADMIN_VIEW_ONLY = "proxy_admin_viewer"
# Organization admins
ORG_ADMIN = "org_admin"
# Internal User Roles
INTERNAL_USER = "internal_user"
INTERNAL_USER_VIEW_ONLY = "internal_user_viewer"
# Team Roles
TEAM = "team"
# Customer Roles - External users of proxy
CUSTOMER = "customer"
```
## **Next Steps - Set Budgets, Rate Limits per Virtual Key**
[Follow this doc to set budgets, rate limiters per virtual key with LiteLLM](users)

View file

@ -1,24 +0,0 @@
import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Router Architecture (Fallbacks / Retries)
## High Level architecture
<Image img={require('../img/router_architecture.png')} style={{ width: '100%', maxWidth: '4000px' }} />
### Request Flow
1. **User Sends Request**: The process begins when a user sends a request to the LiteLLM Router endpoint. All unified endpoints (`.completion`, `.embeddings`, etc) are supported by LiteLLM Router.
2. **function_with_fallbacks**: The initial request is sent to the `function_with_fallbacks` function. This function wraps the initial request in a try-except block, to handle any exceptions - doing fallbacks if needed. This request is then sent to the `function_with_retries` function.
3. **function_with_retries**: The `function_with_retries` function wraps the request in a try-except block and passes the initial request to a base litellm unified function (`litellm.completion`, `litellm.embeddings`, etc) to handle LLM API calling. `function_with_retries` handles any exceptions - doing retries on the model group if needed (i.e. if the request fails, it will retry on an available model within the model group).
4. **litellm.completion**: The `litellm.completion` function is a base function that handles the LLM API calling. It is used by `function_with_retries` to make the actual request to the LLM API.
## Legend
**model_group**: A group of LLM API deployments that share the same `model_name`, are part of the same `model_group`, and can be load balanced across.

View file

@ -1891,22 +1891,3 @@ router = Router(
debug_level="DEBUG" # defaults to INFO
)
```
## Router General Settings
### Usage
```python
router = Router(model_list=..., router_general_settings=RouterGeneralSettings(async_only_mode=True))
```
### Spec
```python
class RouterGeneralSettings(BaseModel):
async_only_mode: bool = Field(
default=False
) # this will only initialize async clients. Good for memory utils
pass_through_all_models: bool = Field(
default=False
) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding
```

View file

@ -1,174 +0,0 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Text Completion
### Usage
<Tabs>
<TabItem value="python" label="LiteLLM Python SDK">
```python
from litellm import text_completion
response = text_completion(
model="gpt-3.5-turbo-instruct",
prompt="Say this is a test",
max_tokens=7
)
```
</TabItem>
<TabItem value="proxy" label="LiteLLM Proxy Server">
1. Define models on config.yaml
```yaml
model_list:
- model_name: gpt-3.5-turbo-instruct
litellm_params:
model: text-completion-openai/gpt-3.5-turbo-instruct # The `text-completion-openai/` prefix will call openai.completions.create
api_key: os.environ/OPENAI_API_KEY
- model_name: text-davinci-003
litellm_params:
model: text-completion-openai/text-davinci-003
api_key: os.environ/OPENAI_API_KEY
```
2. Start litellm proxy server
```
litellm --config config.yaml
```
<Tabs>
<TabItem value="python" label="OpenAI Python SDK">
```python
from openai import OpenAI
# set base_url to your proxy server
# set api_key to send to proxy server
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
response = client.completions.create(
model="gpt-3.5-turbo-instruct",
prompt="Say this is a test",
max_tokens=7
)
print(response)
```
</TabItem>
<TabItem value="curl" label="Curl Request">
```shell
curl --location 'http://0.0.0.0:4000/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer sk-1234' \
--data '{
"model": "gpt-3.5-turbo-instruct",
"prompt": "Say this is a test",
"max_tokens": 7
}'
```
</TabItem>
</Tabs>
</TabItem>
</Tabs>
## Input Params
LiteLLM accepts and translates the [OpenAI Text Completion params](https://platform.openai.com/docs/api-reference/completions) across all supported providers.
### Required Fields
- `model`: *string* - ID of the model to use
- `prompt`: *string or array* - The prompt(s) to generate completions for
### Optional Fields
- `best_of`: *integer* - Generates best_of completions server-side and returns the "best" one
- `echo`: *boolean* - Echo back the prompt in addition to the completion.
- `frequency_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency.
- `logit_bias`: *map* - Modify the likelihood of specified tokens appearing in the completion
- `logprobs`: *integer* - Include the log probabilities on the logprobs most likely tokens. Max value of 5
- `max_tokens`: *integer* - The maximum number of tokens to generate.
- `n`: *integer* - How many completions to generate for each prompt.
- `presence_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far.
- `seed`: *integer* - If specified, system will attempt to make deterministic samples
- `stop`: *string or array* - Up to 4 sequences where the API will stop generating tokens
- `stream`: *boolean* - Whether to stream back partial progress. Defaults to false
- `suffix`: *string* - The suffix that comes after a completion of inserted text
- `temperature`: *number* - What sampling temperature to use, between 0 and 2.
- `top_p`: *number* - An alternative to sampling with temperature, called nucleus sampling.
- `user`: *string* - A unique identifier representing your end-user
## Output Format
Here's the exact JSON output format you can expect from completion calls:
[**Follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/completions/object)
<Tabs>
<TabItem value="non-streaming" label="Non-Streaming Response">
```python
{
"id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
"object": "text_completion",
"created": 1589478378,
"model": "gpt-3.5-turbo-instruct",
"system_fingerprint": "fp_44709d6fcb",
"choices": [
{
"text": "\n\nThis is indeed a test",
"index": 0,
"logprobs": null,
"finish_reason": "length"
}
],
"usage": {
"prompt_tokens": 5,
"completion_tokens": 7,
"total_tokens": 12
}
}
```
</TabItem>
<TabItem value="streaming" label="Streaming Response">
```python
{
"id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe",
"object": "text_completion",
"created": 1690759702,
"choices": [
{
"text": "This",
"index": 0,
"logprobs": null,
"finish_reason": null
}
],
"model": "gpt-3.5-turbo-instruct"
"system_fingerprint": "fp_44709d6fcb",
}
```
</TabItem>
</Tabs>
## **Supported Providers**
| Provider | Link to Usage |
|-------------|--------------------|
| OpenAI | [Usage](../docs/providers/text_completion_openai) |
| Azure OpenAI| [Usage](../docs/providers/azure) |

View file

@ -1,140 +0,0 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Provider specific Wildcard routing
**Proxy all models from a provider**
Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml**
## Step 1. Define provider specific routing
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import Router
router = Router(
model_list=[
{
"model_name": "anthropic/*",
"litellm_params": {
"model": "anthropic/*",
"api_key": os.environ["ANTHROPIC_API_KEY"]
}
},
{
"model_name": "groq/*",
"litellm_params": {
"model": "groq/*",
"api_key": os.environ["GROQ_API_KEY"]
}
},
{
"model_name": "fo::*:static::*", # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*"
"litellm_params": {
"model": "openai/fo::*:static::*",
"api_key": os.environ["OPENAI_API_KEY"]
}
}
]
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
**Step 1** - define provider specific routing on config.yaml
```yaml
model_list:
# provider specific wildcard routing
- model_name: "anthropic/*"
litellm_params:
model: "anthropic/*"
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: "groq/*"
litellm_params:
model: "groq/*"
api_key: os.environ/GROQ_API_KEY
- model_name: "fo::*:static::*" # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*"
litellm_params:
model: "openai/fo::*:static::*"
api_key: os.environ/OPENAI_API_KEY
```
</TabItem>
</Tabs>
## [PROXY-Only] Step 2 - Run litellm proxy
```shell
$ litellm --config /path/to/config.yaml
```
## Step 3 - Test it
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import Router
router = Router(model_list=...)
# Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
resp = completion(model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "Hello, Claude!"}])
print(resp)
# Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
resp = completion(model="groq/llama3-8b-8192", messages=[{"role": "user", "content": "Hello, Groq!"}])
print(resp)
# Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*`
resp = completion(model="fo::hi::static::hi", messages=[{"role": "user", "content": "Hello, Claude!"}])
print(resp)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
```bash
curl http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "anthropic/claude-3-sonnet-20240229",
"messages": [
{"role": "user", "content": "Hello, Claude!"}
]
}'
```
Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
```shell
curl http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "groq/llama3-8b-8192",
"messages": [
{"role": "user", "content": "Hello, Claude!"}
]
}'
```
Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*`
```shell
curl http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "fo::hi::static::hi",
"messages": [
{"role": "user", "content": "Hello, Claude!"}
]
}'
```
</TabItem>
</Tabs>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

View file

@ -29,17 +29,13 @@ const sidebars = {
},
items: [
"proxy/docker_quick_start",
{
"type": "category",
"label": "Config.yaml",
"items": ["proxy/configs", "proxy/config_management", "proxy/config_settings"]
},
{
type: "category",
label: "Setup & Deployment",
items: [
"proxy/deploy",
"proxy/prod",
"proxy/configs",
"proxy/cli",
"proxy/model_management",
"proxy/health",
@ -51,7 +47,7 @@ const sidebars = {
{
type: "category",
label: "Architecture",
items: ["proxy/architecture", "proxy/db_info", "router_architecture"],
items: ["proxy/architecture", "proxy/db_info"],
},
{
type: "link",
@ -100,10 +96,11 @@ const sidebars = {
label: "Spend Tracking + Budgets",
items: ["proxy/cost_tracking", "proxy/users", "proxy/custom_pricing", "proxy/team_budgets", "proxy/billing", "proxy/customers"],
},
"proxy/reliability",
{
type: "link",
label: "Load Balancing, Routing, Fallbacks",
href: "https://docs.litellm.ai/docs/routing-load-balancing",
type: "category",
label: "Routing",
items: ["proxy/load_balancing", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing",],
},
{
type: "category",
@ -205,18 +202,26 @@ const sidebars = {
{
type: "category",
label: "Guides",
link: {
type: "generated-index",
title: "Chat Completions",
description: "Details on the completion() function",
slug: "/completion",
},
items: [
"exception_mapping",
"completion/input",
"completion/provider_specific_params",
"guides/finetuned_models",
"completion/audio",
"completion/vision",
"completion/json_mode",
"completion/prompt_caching",
"completion/audio",
"completion/vision",
"completion/predict_outputs",
"completion/prefix",
"completion/drop_params",
"completion/prompt_formatting",
"completion/output",
"completion/usage",
"exception_mapping",
"completion/stream",
"completion/message_trimming",
"completion/function_call",
@ -224,45 +229,21 @@ const sidebars = {
"completion/batching",
"completion/mock_requests",
"completion/reliable_completions",
]
],
},
{
type: "category",
label: "Supported Endpoints",
items: [
{
type: "category",
label: "Chat",
link: {
type: "generated-index",
title: "Chat Completions",
description: "Details on the completion() function",
slug: "/completion",
},
items: [
"completion/input",
"completion/output",
"completion/usage",
],
},
"text_completion",
"embedding/supported_embedding",
"image_generation",
{
type: "category",
label: "Audio",
"items": [
"audio_transcription",
"text_to_speech",
]
},
"audio_transcription",
"text_to_speech",
"rerank",
"assistants",
"batches",
"realtime",
"fine_tuning",
"moderation",
{
type: "link",
label: "Use LiteLLM Proxy with Vertex, Bedrock SDK",
@ -272,14 +253,8 @@ const sidebars = {
},
{
type: "category",
label: "Routing, Loadbalancing & Fallbacks",
link: {
type: "generated-index",
title: "Routing, Loadbalancing & Fallbacks",
description: "Learn how to load balance, route, and set fallbacks for your LLM requests",
slug: "/routing-load-balancing",
},
items: ["routing", "scheduler", "proxy/load_balancing", "proxy/reliability", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing", "wildcard_routing"],
label: "Load Balancing",
items: ["routing", "scheduler"],
},
{
type: "category",

View file

@ -2,9 +2,7 @@
from typing import Optional, List
from litellm._logging import verbose_logger
from litellm.proxy.proxy_server import PrismaClient, HTTPException
from litellm.llms.custom_httpx.http_handler import HTTPHandler
import collections
import httpx
from datetime import datetime
@ -116,6 +114,7 @@ async def ui_get_spend_by_tags(
def _forecast_daily_cost(data: list):
import requests # type: ignore
from datetime import datetime, timedelta
if len(data) == 0:
@ -137,17 +136,17 @@ def _forecast_daily_cost(data: list):
print("last entry date", last_entry_date)
# Assuming today_date is a datetime object
today_date = datetime.now()
# Calculate the last day of the month
last_day_of_todays_month = datetime(
today_date.year, today_date.month % 12 + 1, 1
) - timedelta(days=1)
print("last day of todays month", last_day_of_todays_month)
# Calculate the remaining days in the month
remaining_days = (last_day_of_todays_month - last_entry_date).days
print("remaining days", remaining_days)
current_spend_this_month = 0
series = {}
for entry in data:
@ -177,19 +176,13 @@ def _forecast_daily_cost(data: list):
"Content-Type": "application/json",
}
client = HTTPHandler()
try:
response = client.post(
url="https://trend-api-production.up.railway.app/forecast",
json=payload,
headers=headers,
)
except httpx.HTTPStatusError as e:
raise HTTPException(
status_code=500,
detail={"error": f"Error getting forecast: {e.response.text}"},
)
response = requests.post(
url="https://trend-api-production.up.railway.app/forecast",
json=payload,
headers=headers,
)
# check the status code
response.raise_for_status()
json_response = response.json()
forecast_data = json_response["forecast"]
@ -213,3 +206,13 @@ def _forecast_daily_cost(data: list):
f"Predicted Spend for { today_month } 2024, ${total_predicted_spend}"
)
return {"response": response_data, "predicted_spend": predicted_spend}
# print(f"Date: {entry['date']}, Spend: {entry['spend']}, Response: {response.text}")
# _forecast_daily_cost(
# [
# {"date": "2022-01-01", "spend": 100},
# ]
# )

View file

@ -24,7 +24,6 @@ from litellm.proxy._types import (
KeyManagementSettings,
LiteLLM_UpperboundKeyGenerateParams,
)
from litellm.types.utils import StandardKeyGenerationConfig
import httpx
import dotenv
from enum import Enum
@ -68,7 +67,6 @@ callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] =
langfuse_default_tags: Optional[List[str]] = None
langsmith_batch_size: Optional[int] = None
argilla_batch_size: Optional[int] = None
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
argilla_transformation_object: Optional[Dict[str, Any]] = None
_async_input_callback: List[Callable] = (
[]
@ -135,7 +133,7 @@ use_client: bool = False
ssl_verify: Union[str, bool] = True
ssl_certificate: Optional[str] = None
disable_streaming_logging: bool = False
in_memory_llm_clients_cache: InMemoryCache = InMemoryCache()
in_memory_llm_clients_cache: dict = {}
safe_memory_mode: bool = False
enable_azure_ad_token_refresh: Optional[bool] = False
### DEFAULT AZURE API VERSION ###
@ -275,7 +273,6 @@ s3_callback_params: Optional[Dict] = None
generic_logger_headers: Optional[Dict] = None
default_key_generate_params: Optional[Dict] = None
upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] = None
key_generation_settings: Optional[StandardKeyGenerationConfig] = None
default_internal_user_params: Optional[Dict] = None
default_team_settings: Optional[List] = None
max_user_budget: Optional[float] = None
@ -283,7 +280,6 @@ default_max_internal_user_budget: Optional[float] = None
max_internal_user_budget: Optional[float] = None
internal_user_budget_duration: Optional[str] = None
max_end_user_budget: Optional[float] = None
disable_end_user_cost_tracking: Optional[bool] = None
#### REQUEST PRIORITIZATION ####
priority_reservation: Optional[Dict[str, float]] = None
#### RELIABILITY ####

View file

@ -313,13 +313,12 @@ def get_redis_async_client(**env_overrides) -> async_redis.Redis:
def get_redis_connection_pool(**env_overrides):
redis_kwargs = _get_redis_client_logic(**env_overrides)
verbose_logger.debug("get_redis_connection_pool: redis_kwargs", redis_kwargs)
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
return async_redis.BlockingConnectionPool.from_url(
timeout=5, url=redis_kwargs["url"]
)
connection_class = async_redis.Connection
if "ssl" in redis_kwargs:
if "ssl" in redis_kwargs and redis_kwargs["ssl"] is not None:
connection_class = async_redis.SSLConnection
redis_kwargs.pop("ssl", None)
redis_kwargs["connection_class"] = connection_class

View file

@ -20,7 +20,6 @@ from typing import TYPE_CHECKING, Any, List, Optional, Tuple
import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
from litellm.types.caching import RedisPipelineIncrementOperation
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
from litellm.types.utils import all_litellm_params
@ -891,92 +890,3 @@ class RedisCache(BaseCache):
def delete_cache(self, key):
self.redis_client.delete(key)
async def _pipeline_increment_helper(
self,
pipe: pipeline,
increment_list: List[RedisPipelineIncrementOperation],
) -> Optional[List[float]]:
"""Helper function for pipeline increment operations"""
# Iterate through each increment operation and add commands to pipeline
for increment_op in increment_list:
cache_key = self.check_and_fix_namespace(key=increment_op["key"])
print_verbose(
f"Increment ASYNC Redis Cache PIPELINE: key: {cache_key}\nValue {increment_op['increment_value']}\nttl={increment_op['ttl']}"
)
pipe.incrbyfloat(cache_key, increment_op["increment_value"])
if increment_op["ttl"] is not None:
_td = timedelta(seconds=increment_op["ttl"])
pipe.expire(cache_key, _td)
# Execute the pipeline and return results
results = await pipe.execute()
print_verbose(f"Increment ASYNC Redis Cache PIPELINE: results: {results}")
return results
async def async_increment_pipeline(
self, increment_list: List[RedisPipelineIncrementOperation], **kwargs
) -> Optional[List[float]]:
"""
Use Redis Pipelines for bulk increment operations
Args:
increment_list: List of RedisPipelineIncrementOperation dicts containing:
- key: str
- increment_value: float
- ttl_seconds: int
"""
# don't waste a network request if there's nothing to increment
if len(increment_list) == 0:
return None
from redis.asyncio import Redis
_redis_client: Redis = self.init_async_client() # type: ignore
start_time = time.time()
print_verbose(
f"Increment Async Redis Cache Pipeline: increment list: {increment_list}"
)
try:
async with _redis_client as redis_client:
async with redis_client.pipeline(transaction=True) as pipe:
results = await self._pipeline_increment_helper(
pipe, increment_list
)
print_verbose(f"pipeline increment results: {results}")
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_increment_pipeline",
start_time=start_time,
end_time=end_time,
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
)
)
return results
except Exception as e:
## LOGGING ##
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_increment_pipeline",
start_time=start_time,
end_time=end_time,
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
)
)
verbose_logger.error(
"LiteLLM Redis Caching: async increment_pipeline() - Got exception from REDIS %s",
str(e),
)
raise e

View file

@ -32,11 +32,9 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.datadog import *
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import StandardLoggingPayload
from .types import DD_ERRORS, DatadogPayload, DataDogStatus
from .utils import make_json_serializable
DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept
@ -108,20 +106,20 @@ class DataDogLogger(CustomBatchLogger):
verbose_logger.debug(
"Datadog: Logging - Enters logging function for model %s", kwargs
)
await self._log_async_event(kwargs, response_obj, start_time, end_time)
except Exception as e:
verbose_logger.exception(
f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
dd_payload = self.create_datadog_logging_payload(
kwargs=kwargs,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
)
pass
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
self.log_queue.append(dd_payload)
verbose_logger.debug(
"Datadog: Logging - Enters logging function for model %s", kwargs
f"Datadog, event added to queue. Will flush in {self.flush_interval} seconds..."
)
await self._log_async_event(kwargs, response_obj, start_time, end_time)
if len(self.log_queue) >= self.batch_size:
await self.async_send_batch()
except Exception as e:
verbose_logger.exception(
@ -183,20 +181,12 @@ class DataDogLogger(CustomBatchLogger):
verbose_logger.debug(
"Datadog: Logging - Enters logging function for model %s", kwargs
)
if litellm.datadog_use_v1 is True:
dd_payload = self._create_v0_logging_payload(
kwargs=kwargs,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
)
else:
dd_payload = self.create_datadog_logging_payload(
kwargs=kwargs,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
)
dd_payload = self.create_datadog_logging_payload(
kwargs=kwargs,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
)
response = self.sync_client.post(
url=self.intake_url,
@ -225,22 +215,6 @@ class DataDogLogger(CustomBatchLogger):
pass
pass
async def _log_async_event(self, kwargs, response_obj, start_time, end_time):
dd_payload = self.create_datadog_logging_payload(
kwargs=kwargs,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
)
self.log_queue.append(dd_payload)
verbose_logger.debug(
f"Datadog, event added to queue. Will flush in {self.flush_interval} seconds..."
)
if len(self.log_queue) >= self.batch_size:
await self.async_send_batch()
def create_datadog_logging_payload(
self,
kwargs: Union[dict, Any],
@ -262,29 +236,73 @@ class DataDogLogger(CustomBatchLogger):
"""
import json
standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
"standard_logging_object", None
)
if standard_logging_object is None:
raise ValueError("standard_logging_object not found in kwargs")
litellm_params = kwargs.get("litellm_params", {})
metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
messages = kwargs.get("messages")
optional_params = kwargs.get("optional_params", {})
call_type = kwargs.get("call_type", "litellm.completion")
cache_hit = kwargs.get("cache_hit", False)
usage = response_obj["usage"]
id = response_obj.get("id", str(uuid.uuid4()))
usage = dict(usage)
try:
response_time = (end_time - start_time).total_seconds() * 1000
except Exception:
response_time = None
status = DataDogStatus.INFO
if standard_logging_object.get("status") == "failure":
status = DataDogStatus.ERROR
try:
response_obj = dict(response_obj)
except Exception:
response_obj = response_obj
# Clean Metadata before logging - never log raw metadata
# the raw metadata can contain circular references which leads to infinite recursion
# we clean out all extra litellm metadata params before logging
clean_metadata = {}
if isinstance(metadata, dict):
for key, value in metadata.items():
# clean litellm metadata before logging
if key in [
"endpoint",
"caching_groups",
"previous_models",
]:
continue
else:
clean_metadata[key] = value
# Build the initial payload
make_json_serializable(standard_logging_object)
json_payload = json.dumps(standard_logging_object)
payload = {
"id": id,
"call_type": call_type,
"cache_hit": cache_hit,
"start_time": start_time,
"end_time": end_time,
"response_time": response_time,
"model": kwargs.get("model", ""),
"user": kwargs.get("user", ""),
"model_parameters": optional_params,
"spend": kwargs.get("response_cost", 0),
"messages": messages,
"response": response_obj,
"usage": usage,
"metadata": clean_metadata,
}
make_json_serializable(payload)
json_payload = json.dumps(payload)
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
dd_payload = DatadogPayload(
ddsource=self._get_datadog_source(),
ddtags=self._get_datadog_tags(),
hostname=self._get_datadog_hostname(),
ddsource=os.getenv("DD_SOURCE", "litellm"),
ddtags="",
hostname="",
message=json_payload,
service=self._get_datadog_service(),
status=status,
service="litellm-server",
status=DataDogStatus.INFO,
)
return dd_payload
@ -364,140 +382,3 @@ class DataDogLogger(CustomBatchLogger):
No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this
"""
return
async def async_post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
"""
Handles Proxy Errors (not-related to LLM API), ex: Authentication Errors
"""
import json
_exception_payload = DatadogProxyFailureHookJsonMessage(
exception=str(original_exception),
error_class=str(original_exception.__class__.__name__),
status_code=getattr(original_exception, "status_code", None),
traceback=traceback.format_exc(),
user_api_key_dict=user_api_key_dict.model_dump(),
)
json_payload = json.dumps(_exception_payload)
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
dd_payload = DatadogPayload(
ddsource=self._get_datadog_source(),
ddtags=self._get_datadog_tags(),
hostname=self._get_datadog_hostname(),
message=json_payload,
service=self._get_datadog_service(),
status=DataDogStatus.ERROR,
)
self.log_queue.append(dd_payload)
def _create_v0_logging_payload(
self,
kwargs: Union[dict, Any],
response_obj: Any,
start_time: datetime.datetime,
end_time: datetime.datetime,
) -> DatadogPayload:
"""
Note: This is our V1 Version of DataDog Logging Payload
(Not Recommended) If you want this to get logged set `litellm.datadog_use_v1 = True`
"""
import json
litellm_params = kwargs.get("litellm_params", {})
metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
messages = kwargs.get("messages")
optional_params = kwargs.get("optional_params", {})
call_type = kwargs.get("call_type", "litellm.completion")
cache_hit = kwargs.get("cache_hit", False)
usage = response_obj["usage"]
id = response_obj.get("id", str(uuid.uuid4()))
usage = dict(usage)
try:
response_time = (end_time - start_time).total_seconds() * 1000
except Exception:
response_time = None
try:
response_obj = dict(response_obj)
except Exception:
response_obj = response_obj
# Clean Metadata before logging - never log raw metadata
# the raw metadata can contain circular references which leads to infinite recursion
# we clean out all extra litellm metadata params before logging
clean_metadata = {}
if isinstance(metadata, dict):
for key, value in metadata.items():
# clean litellm metadata before logging
if key in [
"endpoint",
"caching_groups",
"previous_models",
]:
continue
else:
clean_metadata[key] = value
# Build the initial payload
payload = {
"id": id,
"call_type": call_type,
"cache_hit": cache_hit,
"start_time": start_time,
"end_time": end_time,
"response_time": response_time,
"model": kwargs.get("model", ""),
"user": kwargs.get("user", ""),
"model_parameters": optional_params,
"spend": kwargs.get("response_cost", 0),
"messages": messages,
"response": response_obj,
"usage": usage,
"metadata": clean_metadata,
}
make_json_serializable(payload)
json_payload = json.dumps(payload)
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
dd_payload = DatadogPayload(
ddsource=self._get_datadog_source(),
ddtags=self._get_datadog_tags(),
hostname=self._get_datadog_hostname(),
message=json_payload,
service=self._get_datadog_service(),
status=DataDogStatus.INFO,
)
return dd_payload
@staticmethod
def _get_datadog_tags():
return f"env:{os.getenv('DD_ENV', 'unknown')},service:{os.getenv('DD_SERVICE', 'litellm')},version:{os.getenv('DD_VERSION', 'unknown')}"
@staticmethod
def _get_datadog_source():
return os.getenv("DD_SOURCE", "litellm")
@staticmethod
def _get_datadog_service():
return os.getenv("DD_SERVICE", "litellm-server")
@staticmethod
def _get_datadog_hostname():
return ""
@staticmethod
def _get_datadog_env():
return os.getenv("DD_ENV", "unknown")

View file

@ -1,5 +1,5 @@
from enum import Enum
from typing import Optional, TypedDict
from typing import TypedDict
class DataDogStatus(str, Enum):
@ -19,11 +19,3 @@ class DatadogPayload(TypedDict, total=False):
class DD_ERRORS(Enum):
DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
exception: str
error_class: str
status_code: Optional[int]
traceback: str
user_api_key_dict: dict

View file

@ -18,7 +18,6 @@ from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.prometheus import *
from litellm.types.utils import StandardLoggingPayload
from litellm.utils import get_end_user_id_for_cost_tracking
class PrometheusLogger(CustomLogger):
@ -365,7 +364,8 @@ class PrometheusLogger(CustomLogger):
model = kwargs.get("model", "")
litellm_params = kwargs.get("litellm_params", {}) or {}
_metadata = litellm_params.get("metadata", {})
end_user_id = get_end_user_id_for_cost_tracking(litellm_params)
proxy_server_request = litellm_params.get("proxy_server_request") or {}
end_user_id = proxy_server_request.get("body", {}).get("user", None)
user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]
@ -664,11 +664,13 @@ class PrometheusLogger(CustomLogger):
# unpack kwargs
model = kwargs.get("model", "")
litellm_params = kwargs.get("litellm_params", {}) or {}
standard_logging_payload: StandardLoggingPayload = kwargs.get(
"standard_logging_object", {}
)
litellm_params = kwargs.get("litellm_params", {}) or {}
end_user_id = get_end_user_id_for_cost_tracking(litellm_params)
proxy_server_request = litellm_params.get("proxy_server_request") or {}
end_user_id = proxy_server_request.get("body", {}).get("user", None)
user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]

View file

@ -8,5 +8,4 @@ Core files:
- `exception_mapping_utils.py`: utils for mapping exceptions to openai-compatible error types.
- `default_encoding.py`: code for loading the default encoding (tiktoken)
- `get_llm_provider_logic.py`: code for inferring the LLM provider from a given model name.
- `duration_parser.py`: code for parsing durations - e.g. "1d", "1mo", "10s"

View file

@ -1,92 +0,0 @@
"""
Helper utilities for parsing durations - 1s, 1d, 10d, 30d, 1mo, 2mo
duration_in_seconds is used in diff parts of the code base, example
- Router - Provider budget routing
- Proxy - Key, Team Generation
"""
import re
import time
from datetime import datetime, timedelta
from typing import Tuple
def _extract_from_regex(duration: str) -> Tuple[int, str]:
match = re.match(r"(\d+)(mo|[smhd]?)", duration)
if not match:
raise ValueError("Invalid duration format")
value, unit = match.groups()
value = int(value)
return value, unit
def get_last_day_of_month(year, month):
# Handle December case
if month == 12:
return 31
# Next month is January, so subtract a day from March 1st
next_month = datetime(year=year, month=month + 1, day=1)
last_day_of_month = (next_month - timedelta(days=1)).day
return last_day_of_month
def duration_in_seconds(duration: str) -> int:
"""
Parameters:
- duration:
- "<number>s" - seconds
- "<number>m" - minutes
- "<number>h" - hours
- "<number>d" - days
- "<number>mo" - months
Returns time in seconds till when budget needs to be reset
"""
value, unit = _extract_from_regex(duration=duration)
if unit == "s":
return value
elif unit == "m":
return value * 60
elif unit == "h":
return value * 3600
elif unit == "d":
return value * 86400
elif unit == "mo":
now = time.time()
current_time = datetime.fromtimestamp(now)
if current_time.month == 12:
target_year = current_time.year + 1
target_month = 1
else:
target_year = current_time.year
target_month = current_time.month + value
# Determine the day to set for next month
target_day = current_time.day
last_day_of_target_month = get_last_day_of_month(target_year, target_month)
if target_day > last_day_of_target_month:
target_day = last_day_of_target_month
next_month = datetime(
year=target_year,
month=target_month,
day=target_day,
hour=current_time.hour,
minute=current_time.minute,
second=current_time.second,
microsecond=current_time.microsecond,
)
# Calculate the duration until the first day of the next month
duration_until_next_month = next_month - current_time
return int(duration_until_next_month.total_seconds())
else:
raise ValueError(f"Unsupported duration unit, passed duration: {duration}")

View file

@ -934,10 +934,19 @@ class Logging:
status="success",
)
)
callbacks = get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_success_callbacks,
global_callbacks=litellm.success_callback,
)
if self.dynamic_success_callbacks is not None and isinstance(
self.dynamic_success_callbacks, list
):
callbacks = self.dynamic_success_callbacks
## keep the internal functions ##
for callback in litellm.success_callback:
if (
isinstance(callback, CustomLogger)
and "_PROXY_" in callback.__class__.__name__
):
callbacks.append(callback)
else:
callbacks = litellm.success_callback
## REDACT MESSAGES ##
result = redact_message_input_output_from_logging(
@ -1359,11 +1368,8 @@ class Logging:
and customLogger is not None
): # custom logger functions
print_verbose(
"success callbacks: Running Custom Callback Function - {}".format(
callback
)
"success callbacks: Running Custom Callback Function"
)
customLogger.log_event(
kwargs=self.model_call_details,
response_obj=result,
@ -1460,10 +1466,21 @@ class Logging:
status="success",
)
)
callbacks = get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_async_success_callbacks,
global_callbacks=litellm._async_success_callback,
)
if self.dynamic_async_success_callbacks is not None and isinstance(
self.dynamic_async_success_callbacks, list
):
callbacks = self.dynamic_async_success_callbacks
## keep the internal functions ##
for callback in litellm._async_success_callback:
callback_name = ""
if isinstance(callback, CustomLogger):
callback_name = callback.__class__.__name__
if callable(callback):
callback_name = callback.__name__
if "_PROXY_" in callback_name:
callbacks.append(callback)
else:
callbacks = litellm._async_success_callback
result = redact_message_input_output_from_logging(
model_call_details=(
@ -1730,10 +1747,21 @@ class Logging:
start_time=start_time,
end_time=end_time,
)
callbacks = get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_failure_callbacks,
global_callbacks=litellm.failure_callback,
)
callbacks = [] # init this to empty incase it's not created
if self.dynamic_failure_callbacks is not None and isinstance(
self.dynamic_failure_callbacks, list
):
callbacks = self.dynamic_failure_callbacks
## keep the internal functions ##
for callback in litellm.failure_callback:
if (
isinstance(callback, CustomLogger)
and "_PROXY_" in callback.__class__.__name__
):
callbacks.append(callback)
else:
callbacks = litellm.failure_callback
result = None # result sent to all loggers, init this to None incase it's not created
@ -1916,10 +1944,21 @@ class Logging:
end_time=end_time,
)
callbacks = get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_async_failure_callbacks,
global_callbacks=litellm._async_failure_callback,
)
callbacks = [] # init this to empty incase it's not created
if self.dynamic_async_failure_callbacks is not None and isinstance(
self.dynamic_async_failure_callbacks, list
):
callbacks = self.dynamic_async_failure_callbacks
## keep the internal functions ##
for callback in litellm._async_failure_callback:
if (
isinstance(callback, CustomLogger)
and "_PROXY_" in callback.__class__.__name__
):
callbacks.append(callback)
else:
callbacks = litellm._async_failure_callback
result = None # result sent to all loggers, init this to None incase it's not created
for callback in callbacks:
@ -2320,7 +2359,6 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
_in_memory_loggers.append(_mlflow_logger)
return _mlflow_logger # type: ignore
def get_custom_logger_compatible_class(
logging_integration: litellm._custom_logger_compatible_callbacks_literal,
) -> Optional[CustomLogger]:
@ -2911,11 +2949,3 @@ def modify_integration(integration_name, integration_params):
if integration_name == "supabase":
if "table_name" in integration_params:
Supabase.supabase_table_name = integration_params["table_name"]
def get_combined_callback_list(
dynamic_success_callbacks: Optional[List], global_callbacks: List
) -> List:
if dynamic_success_callbacks is None:
return global_callbacks
return list(set(dynamic_success_callbacks + global_callbacks))

View file

@ -1793,7 +1793,7 @@ class CustomStreamWrapper:
or self.custom_llm_provider == "bedrock"
or self.custom_llm_provider == "triton"
or self.custom_llm_provider == "watsonx"
or self.custom_llm_provider in litellm.openai_compatible_providers
or self.custom_llm_provider in litellm.openai_compatible_endpoints
or self.custom_llm_provider in litellm._custom_providers
):
async for chunk in self.completion_stream:

View file

@ -12,11 +12,7 @@ from typing_extensions import overload
import litellm
from litellm.caching.caching import DualCache
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.utils import EmbeddingResponse
from litellm.utils import (
CustomStreamWrapper,
@ -981,10 +977,7 @@ class AzureChatCompletion(BaseLLM):
else:
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.AZURE,
params=_params,
)
async_handler = AsyncHTTPHandler(**_params) # type: ignore
else:
async_handler = client # type: ignore
@ -1528,8 +1521,7 @@ class AzureChatCompletion(BaseLLM):
prompt: Optional[str] = None,
) -> dict:
client_session = (
litellm.aclient_session
or get_async_httpx_client(llm_provider=litellm.LlmProviders.AZURE).client
litellm.aclient_session or httpx.AsyncClient()
) # handle dall-e-2 calls
if "gateway.ai.cloudflare.com" in api_base:

View file

@ -17,6 +17,22 @@ from litellm.utils import CustomStreamWrapper
class OpenAIO1ChatCompletion(OpenAIChatCompletion):
async def mock_async_streaming(
self,
response: Any,
model: Optional[str],
logging_obj: Any,
):
model_response = await response
completion_stream = MockResponseIterator(model_response=model_response)
streaming_response = CustomStreamWrapper(
completion_stream=completion_stream,
model=model,
custom_llm_provider="openai",
logging_obj=logging_obj,
)
return streaming_response
def completion(
self,
model_response: ModelResponse,
@ -38,7 +54,7 @@ class OpenAIO1ChatCompletion(OpenAIChatCompletion):
custom_llm_provider: Optional[str] = None,
drop_params: Optional[bool] = None,
):
# stream: Optional[bool] = optional_params.pop("stream", False)
stream: Optional[bool] = optional_params.pop("stream", False)
response = super().completion(
model_response,
timeout,
@ -60,4 +76,20 @@ class OpenAIO1ChatCompletion(OpenAIChatCompletion):
drop_params,
)
return response
if stream is True:
if asyncio.iscoroutine(response):
return self.mock_async_streaming(
response=response, model=model, logging_obj=logging_obj # type: ignore
)
completion_stream = MockResponseIterator(model_response=response)
streaming_response = CustomStreamWrapper(
completion_stream=completion_stream,
model=model,
custom_llm_provider="openai",
logging_obj=logging_obj,
)
return streaming_response
else:
return response

View file

@ -18,7 +18,6 @@ import litellm
from litellm import LlmProviders
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
from litellm.secret_managers.main import get_secret_str
from litellm.types.utils import ProviderField
from litellm.utils import (
@ -563,9 +562,8 @@ class OpenAIChatCompletion(BaseLLM):
_cache_key = f"hashed_api_key={hashed_api_key},api_base={api_base},timeout={timeout},max_retries={max_retries},organization={organization},is_async={is_async}"
_cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key)
if _cached_client:
return _cached_client
if _cache_key in litellm.in_memory_llm_clients_cache:
return litellm.in_memory_llm_clients_cache[_cache_key]
if is_async:
_new_client: Union[OpenAI, AsyncOpenAI] = AsyncOpenAI(
api_key=api_key,
@ -586,11 +584,7 @@ class OpenAIChatCompletion(BaseLLM):
)
## SAVE CACHE KEY
litellm.in_memory_llm_clients_cache.set_cache(
key=_cache_key,
value=_new_client,
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
)
litellm.in_memory_llm_clients_cache[_cache_key] = _new_client
return _new_client
else:

View file

@ -779,32 +779,3 @@ class ModelResponseIterator:
raise StopAsyncIteration
except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> GenericStreamingChunk:
"""
Convert a string chunk to a GenericStreamingChunk
Note: This is used for Anthropic pass through streaming logging
We can move __anext__, and __next__ to use this function since it's common logic.
Did not migrate them to minmize changes made in 1 PR.
"""
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
if str_line.startswith("data:"):
data_json = json.loads(str_line[5:])
return self.chunk_parser(chunk=data_json)
else:
return GenericStreamingChunk(
text="",
is_finished=False,
finish_reason="",
usage=None,
index=0,
tool_use=None,
)

View file

@ -13,11 +13,7 @@ import httpx
import requests
import litellm
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
from ..base import BaseLLM
@ -166,10 +162,7 @@ class AnthropicTextCompletion(BaseLLM):
client=None,
):
if client is None:
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.ANTHROPIC,
params={"timeout": httpx.Timeout(timeout=600.0, connect=5.0)},
)
client = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
response = await client.post(api_base, headers=headers, data=json.dumps(data))
@ -205,10 +198,7 @@ class AnthropicTextCompletion(BaseLLM):
client=None,
):
if client is None:
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.ANTHROPIC,
params={"timeout": httpx.Timeout(timeout=600.0, connect=5.0)},
)
client = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
response = await client.post(api_base, headers=headers, data=json.dumps(data))

View file

@ -74,10 +74,7 @@ class AzureAIEmbedding(OpenAIChatCompletion):
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> EmbeddingResponse:
if client is None or not isinstance(client, AsyncHTTPHandler):
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.AZURE_AI,
params={"timeout": timeout},
)
client = AsyncHTTPHandler(timeout=timeout, concurrent_limit=1)
url = "{}/images/embeddings".format(api_base)

View file

@ -4,7 +4,6 @@ import httpx
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.cohere.rerank import CohereRerank
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.rerank import RerankResponse
@ -74,7 +73,6 @@ class AzureAIRerank(CohereRerank):
return_documents: Optional[bool] = True,
max_chunks_per_doc: Optional[int] = None,
_is_async: Optional[bool] = False,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> RerankResponse:
if headers is None:

View file

@ -458,7 +458,7 @@ class AmazonConverseConfig:
"""
Abbreviations of regions AWS Bedrock supports for cross region inference
"""
return ["us", "eu", "apac"]
return ["us", "eu"]
def _get_base_model(self, model: str) -> str:
"""

View file

@ -9,10 +9,7 @@ import httpx
import requests
import litellm
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
from .prompt_templates.factory import custom_prompt, prompt_factory
@ -188,10 +185,7 @@ async def async_completion(
headers={},
):
async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.CLARIFAI,
params={"timeout": 600.0},
)
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
response = await async_handler.post(
url=model, headers=headers, data=json.dumps(data)
)

View file

@ -11,11 +11,7 @@ import requests # type: ignore
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.llms.bedrock import CohereEmbeddingRequest
from litellm.utils import Choices, Message, ModelResponse, Usage
@ -74,12 +70,8 @@ async def async_embedding(
},
)
## COMPLETION CALL
if client is None:
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.COHERE,
params={"timeout": timeout},
)
client = AsyncHTTPHandler(concurrent_limit=1, timeout=timeout)
try:
response = await client.post(api_base, headers=headers, data=json.dumps(data))
@ -152,11 +144,6 @@ def embedding(
api_key=api_key,
headers=headers,
encoding=encoding,
client=(
client
if client is not None and isinstance(client, AsyncHTTPHandler)
else None
),
)
## LOGGING

View file

@ -6,14 +6,10 @@ LiteLLM supports the re rank API format, no paramter transformation occurs
from typing import Any, Dict, List, Optional, Union
import httpx
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base import BaseLLM
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
_get_httpx_client,
get_async_httpx_client,
)
@ -38,23 +34,6 @@ class CohereRerank(BaseLLM):
# Merge other headers, overriding any default ones except Authorization
return {**default_headers, **headers}
def ensure_rerank_endpoint(self, api_base: str) -> str:
"""
Ensures the `/v1/rerank` endpoint is appended to the given `api_base`.
If `/v1/rerank` is already present, the original URL is returned.
:param api_base: The base API URL.
:return: A URL with `/v1/rerank` appended if missing.
"""
# Parse the base URL to ensure proper structure
url = httpx.URL(api_base)
# Check if the URL already ends with `/v1/rerank`
if not url.path.endswith("/v1/rerank"):
url = url.copy_with(path=f"{url.path.rstrip('/')}/v1/rerank")
return str(url)
def rerank(
self,
model: str,
@ -69,10 +48,9 @@ class CohereRerank(BaseLLM):
return_documents: Optional[bool] = True,
max_chunks_per_doc: Optional[int] = None,
_is_async: Optional[bool] = False, # New parameter
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> RerankResponse:
headers = self.validate_environment(api_key=api_key, headers=headers)
api_base = self.ensure_rerank_endpoint(api_base)
request_data = RerankRequest(
model=model,
query=query,
@ -98,13 +76,9 @@ class CohereRerank(BaseLLM):
if _is_async:
return self.async_rerank(request_data=request_data, api_key=api_key, api_base=api_base, headers=headers) # type: ignore # Call async method
if client is not None and isinstance(client, HTTPHandler):
client = client
else:
client = _get_httpx_client()
client = _get_httpx_client()
response = client.post(
url=api_base,
api_base,
headers=headers,
json=request_data_dict,
)
@ -126,13 +100,10 @@ class CohereRerank(BaseLLM):
api_key: str,
api_base: str,
headers: dict,
client: Optional[AsyncHTTPHandler] = None,
) -> RerankResponse:
request_data_dict = request_data.dict(exclude_none=True)
client = client or get_async_httpx_client(
llm_provider=litellm.LlmProviders.COHERE
)
client = get_async_httpx_client(llm_provider=litellm.LlmProviders.COHERE)
response = await client.post(
api_base,

View file

@ -7,8 +7,8 @@ import httpx
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
import litellm
from litellm.caching import InMemoryCache
from litellm.types.llms.custom_http import *
from .types import httpxSpecialProvider
if TYPE_CHECKING:
from litellm import LlmProviders
@ -26,63 +26,6 @@ headers = {
# https://www.python-httpx.org/advanced/timeouts
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour
import re
def mask_sensitive_info(error_message):
# Find the start of the key parameter
if isinstance(error_message, str):
key_index = error_message.find("key=")
else:
return error_message
# If key is found
if key_index != -1:
# Find the end of the key parameter (next & or end of string)
next_param = error_message.find("&", key_index)
if next_param == -1:
# If no more parameters, mask until the end of the string
masked_message = error_message[: key_index + 4] + "[REDACTED_API_KEY]"
else:
# Replace the key with redacted value, keeping other parameters
masked_message = (
error_message[: key_index + 4]
+ "[REDACTED_API_KEY]"
+ error_message[next_param:]
)
return masked_message
return error_message
class MaskedHTTPStatusError(httpx.HTTPStatusError):
def __init__(
self, original_error, message: Optional[str] = None, text: Optional[str] = None
):
# Create a new error with the masked URL
masked_url = mask_sensitive_info(str(original_error.request.url))
# Create a new error that looks like the original, but with a masked URL
super().__init__(
message=original_error.message,
request=httpx.Request(
method=original_error.request.method,
url=masked_url,
headers=original_error.request.headers,
content=original_error.request.content,
),
response=httpx.Response(
status_code=original_error.response.status_code,
content=original_error.response.content,
headers=original_error.response.headers,
),
)
self.message = message
self.text = text
class AsyncHTTPHandler:
@ -211,16 +154,13 @@ class AsyncHTTPHandler:
headers=headers,
)
except httpx.HTTPStatusError as e:
setattr(e, "status_code", e.response.status_code)
if stream is True:
setattr(e, "message", await e.response.aread())
setattr(e, "text", await e.response.aread())
else:
setattr(e, "message", mask_sensitive_info(e.response.text))
setattr(e, "text", mask_sensitive_info(e.response.text))
setattr(e, "status_code", e.response.status_code)
setattr(e, "message", e.response.text)
setattr(e, "text", e.response.text)
raise e
except Exception as e:
raise e
@ -458,17 +398,11 @@ class HTTPHandler:
llm_provider="litellm-httpx-handler",
)
except httpx.HTTPStatusError as e:
if stream is True:
setattr(e, "message", mask_sensitive_info(e.response.read()))
setattr(e, "text", mask_sensitive_info(e.response.read()))
else:
error_text = mask_sensitive_info(e.response.text)
setattr(e, "message", error_text)
setattr(e, "text", error_text)
setattr(e, "status_code", e.response.status_code)
if stream is True:
setattr(e, "message", e.response.read())
else:
setattr(e, "message", e.response.text)
raise e
except Exception as e:
raise e
@ -542,9 +476,8 @@ def get_async_httpx_client(
pass
_cache_key_name = "async_httpx_client" + _params_key_name + llm_provider
_cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key_name)
if _cached_client:
return _cached_client
if _cache_key_name in litellm.in_memory_llm_clients_cache:
return litellm.in_memory_llm_clients_cache[_cache_key_name]
if params is not None:
_new_client = AsyncHTTPHandler(**params)
@ -552,11 +485,7 @@ def get_async_httpx_client(
_new_client = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
)
litellm.in_memory_llm_clients_cache.set_cache(
key=_cache_key_name,
value=_new_client,
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
)
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
return _new_client
@ -576,18 +505,13 @@ def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
pass
_cache_key_name = "httpx_client" + _params_key_name
_cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key_name)
if _cached_client:
return _cached_client
if _cache_key_name in litellm.in_memory_llm_clients_cache:
return litellm.in_memory_llm_clients_cache[_cache_key_name]
if params is not None:
_new_client = HTTPHandler(**params)
else:
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
litellm.in_memory_llm_clients_cache.set_cache(
key=_cache_key_name,
value=_new_client,
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
)
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
return _new_client

View file

@ -0,0 +1,11 @@
from enum import Enum
import litellm
class httpxSpecialProvider(str, Enum):
LoggingCallback = "logging_callback"
GuardrailCallback = "guardrail_callback"
Caching = "caching"
Oauth2Check = "oauth2_check"
SecretManager = "secret_manager"

View file

@ -393,10 +393,7 @@ class DatabricksChatCompletion(BaseLLM):
if timeout is None:
timeout = httpx.Timeout(timeout=600.0, connect=5.0)
self.async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.DATABRICKS,
params={"timeout": timeout},
)
self.async_handler = AsyncHTTPHandler(timeout=timeout)
try:
response = await self.async_handler.post(
@ -613,10 +610,7 @@ class DatabricksChatCompletion(BaseLLM):
response = None
try:
if client is None or isinstance(client, AsyncHTTPHandler):
self.async_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.DATABRICKS,
params={"timeout": timeout},
)
self.async_client = AsyncHTTPHandler(timeout=timeout) # type: ignore
else:
self.async_client = client

View file

@ -5,14 +5,9 @@ from typing import Any, Coroutine, Literal, Optional, Union
import httpx
from openai.types.fine_tuning.fine_tuning_job import FineTuningJob, Hyperparameters
import litellm
from litellm._logging import verbose_logger
from litellm.llms.base import BaseLLM
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
VertexLLM,
)
@ -31,9 +26,8 @@ class VertexFineTuningAPI(VertexLLM):
def __init__(self) -> None:
super().__init__()
self.async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.VERTEX_AI,
params={"timeout": 600.0},
self.async_handler = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
)
def convert_response_created_at(self, response: ResponseTuningJob):

View file

@ -6,68 +6,55 @@ from typing import Any, Callable, Optional, Union
from httpx._config import Timeout
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.utils import CustomStreamingDecoder
from litellm.utils import ModelResponse
from ...groq.chat.transformation import GroqChatConfig
from ...openai_like.chat.handler import OpenAILikeChatHandler
from ...OpenAI.openai import OpenAIChatCompletion
class GroqChatCompletion(OpenAILikeChatHandler):
class GroqChatCompletion(OpenAIChatCompletion):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def completion(
self,
*,
model: str,
messages: list,
api_base: str,
custom_llm_provider: str,
custom_prompt_dict: dict,
model_response: ModelResponse,
print_verbose: Callable,
encoding,
api_key: Optional[str],
logging_obj,
timeout: Union[float, Timeout],
optional_params: dict,
acompletion=None,
logging_obj: Any,
model: Optional[str] = None,
messages: Optional[list] = None,
print_verbose: Optional[Callable[..., Any]] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
acompletion: bool = False,
litellm_params=None,
logger_fn=None,
headers: Optional[dict] = None,
timeout: Optional[Union[float, Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
custom_endpoint: Optional[bool] = None,
streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False
custom_prompt_dict: dict = {},
client=None,
organization: Optional[str] = None,
custom_llm_provider: Optional[str] = None,
drop_params: Optional[bool] = None,
):
messages = GroqChatConfig()._transform_messages(messages) # type: ignore
if optional_params.get("stream") is True:
fake_stream = GroqChatConfig()._should_fake_stream(optional_params)
else:
fake_stream = False
return super().completion(
model=model,
messages=messages,
api_base=api_base,
custom_llm_provider=custom_llm_provider,
custom_prompt_dict=custom_prompt_dict,
model_response=model_response,
print_verbose=print_verbose,
encoding=encoding,
api_key=api_key,
logging_obj=logging_obj,
optional_params=optional_params,
acompletion=acompletion,
litellm_params=litellm_params,
logger_fn=logger_fn,
headers=headers,
timeout=timeout,
client=client,
custom_endpoint=custom_endpoint,
streaming_decoder=streaming_decoder,
fake_stream=fake_stream,
model_response,
timeout,
optional_params,
logging_obj,
model,
messages,
print_verbose,
api_key,
api_base,
acompletion,
litellm_params,
logger_fn,
headers,
custom_prompt_dict,
client,
organization,
custom_llm_provider,
drop_params,
)

View file

@ -2,7 +2,6 @@
Translate from OpenAI's `/v1/chat/completions` to Groq's `/v1/chat/completions`
"""
import json
import types
from typing import List, Optional, Tuple, Union
@ -10,12 +9,7 @@ from pydantic import BaseModel
import litellm
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionAssistantMessage,
ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
)
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
@ -105,69 +99,3 @@ class GroqChatConfig(OpenAIGPTConfig):
) # type: ignore
dynamic_api_key = api_key or get_secret_str("GROQ_API_KEY")
return api_base, dynamic_api_key
def _should_fake_stream(self, optional_params: dict) -> bool:
"""
Groq doesn't support 'response_format' while streaming
"""
if optional_params.get("response_format") is not None:
return True
return False
def _create_json_tool_call_for_response_format(
self,
json_schema: dict,
):
"""
Handles creating a tool call for getting responses in JSON format.
Args:
json_schema (Optional[dict]): The JSON schema the response should be in
Returns:
AnthropicMessagesTool: The tool call to send to Anthropic API to get responses in JSON format
"""
return ChatCompletionToolParam(
type="function",
function=ChatCompletionToolParamFunctionChunk(
name="json_tool_call",
parameters=json_schema,
),
)
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool = False,
) -> dict:
_response_format = non_default_params.get("response_format")
if _response_format is not None and isinstance(_response_format, dict):
json_schema: Optional[dict] = None
if "response_schema" in _response_format:
json_schema = _response_format["response_schema"]
elif "json_schema" in _response_format:
json_schema = _response_format["json_schema"]["schema"]
"""
When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
- You usually want to provide a single tool
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the models perspective.
"""
if json_schema is not None:
_tool_choice = {
"type": "function",
"function": {"name": "json_tool_call"},
}
_tool = self._create_json_tool_call_for_response_format(
json_schema=json_schema,
)
optional_params["tools"] = [_tool]
optional_params["tool_choice"] = _tool_choice
optional_params["json_mode"] = True
non_default_params.pop("response_format", None)
return super().map_openai_params(
non_default_params, optional_params, model, drop_params
)

View file

@ -263,11 +263,7 @@ def get_hf_task_for_model(model: str) -> Tuple[hf_tasks, str]:
return "text-generation-inference", model # default to tgi
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
def get_hf_task_embedding_for_model(
@ -305,9 +301,7 @@ async def async_get_hf_task_embedding_for_model(
task_type, hf_tasks_embeddings
)
)
http_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.HUGGINGFACE,
)
http_client = AsyncHTTPHandler(concurrent_limit=1)
model_info = await http_client.get(url=api_base)
@ -1073,9 +1067,7 @@ class Huggingface(BaseLLM):
)
## COMPLETION CALL
if client is None:
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.HUGGINGFACE,
)
client = AsyncHTTPHandler(concurrent_limit=1)
response = await client.post(api_base, headers=headers, data=json.dumps(data))

View file

@ -14,7 +14,6 @@ import requests # type: ignore
import litellm
from litellm import verbose_logger
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
from litellm.secret_managers.main import get_secret_str
from litellm.types.utils import ModelInfo, ProviderField, StreamingChoices
@ -165,30 +164,6 @@ class OllamaConfig:
"response_format",
]
def map_openai_params(
self, optional_params: dict, non_default_params: dict
) -> dict:
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["num_predict"] = value
if param == "stream":
optional_params["stream"] = value
if param == "temperature":
optional_params["temperature"] = value
if param == "seed":
optional_params["seed"] = value
if param == "top_p":
optional_params["top_p"] = value
if param == "frequency_penalty":
optional_params["repeat_penalty"] = value
if param == "stop":
optional_params["stop"] = value
if param == "response_format" and isinstance(value, dict):
if value["type"] == "json_object":
optional_params["format"] = "json"
return optional_params
def _supports_function_calling(self, ollama_model_info: dict) -> bool:
"""
Check if the 'template' field in the ollama_model_info contains a 'tools' or 'function' key.
@ -457,10 +432,7 @@ def ollama_completion_stream(url, data, logging_obj):
async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
try:
_async_http_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.OLLAMA
)
client = _async_http_client.client
client = httpx.AsyncClient()
async with client.stream(
url=f"{url}", json=data, method="POST", timeout=litellm.request_timeout
) as response:

View file

@ -13,7 +13,6 @@ from pydantic import BaseModel
import litellm
from litellm import verbose_logger
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
from litellm.types.llms.ollama import OllamaToolCall, OllamaToolCallFunction
from litellm.types.llms.openai import ChatCompletionAssistantToolCall
from litellm.types.utils import StreamingChoices
@ -446,10 +445,7 @@ async def ollama_async_streaming(
url, api_key, data, model_response, encoding, logging_obj
):
try:
_async_http_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.OLLAMA
)
client = _async_http_client.client
client = httpx.AsyncClient()
_request = {
"url": f"{url}",
"json": data,

View file

@ -17,9 +17,7 @@ import httpx # type: ignore
import requests # type: ignore
import litellm
from litellm import LlmProviders
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
@ -27,19 +25,9 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.llms.databricks.streaming_utils import ModelResponseIterator
from litellm.types.utils import CustomStreamingDecoder, ModelResponse
from litellm.utils import (
Choices,
CustomStreamWrapper,
EmbeddingResponse,
Message,
ProviderConfigManager,
TextCompletionResponse,
Usage,
convert_to_model_response_object,
)
from litellm.utils import CustomStreamWrapper, EmbeddingResponse
from ..common_utils import OpenAILikeBase, OpenAILikeError
from .transformation import OpenAILikeChatConfig
async def make_call(
@ -51,22 +39,16 @@ async def make_call(
messages: list,
logging_obj,
streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False,
):
if client is None:
client = litellm.module_level_aclient
response = await client.post(
api_base, headers=headers, data=data, stream=not fake_stream
)
response = await client.post(api_base, headers=headers, data=data, stream=True)
if streaming_decoder is not None:
completion_stream: Any = streaming_decoder.aiter_bytes(
response.aiter_bytes(chunk_size=1024)
)
elif fake_stream:
model_response = ModelResponse(**response.json())
completion_stream = MockResponseIterator(model_response=model_response)
else:
completion_stream = ModelResponseIterator(
streaming_response=response.aiter_lines(), sync_stream=False
@ -91,12 +73,11 @@ def make_sync_call(
messages: list,
logging_obj,
streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False,
):
if client is None:
client = litellm.module_level_client # Create a new client if none provided
response = client.post(api_base, headers=headers, data=data, stream=not fake_stream)
response = client.post(api_base, headers=headers, data=data, stream=True)
if response.status_code != 200:
raise OpenAILikeError(status_code=response.status_code, message=response.read())
@ -105,9 +86,6 @@ def make_sync_call(
completion_stream = streaming_decoder.iter_bytes(
response.iter_bytes(chunk_size=1024)
)
elif fake_stream:
model_response = ModelResponse(**response.json())
completion_stream = MockResponseIterator(model_response=model_response)
else:
completion_stream = ModelResponseIterator(
streaming_response=response.iter_lines(), sync_stream=True
@ -148,8 +126,8 @@ class OpenAILikeChatHandler(OpenAILikeBase):
headers={},
client: Optional[AsyncHTTPHandler] = None,
streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False,
) -> CustomStreamWrapper:
data["stream"] = True
completion_stream = await make_call(
client=client,
@ -191,7 +169,6 @@ class OpenAILikeChatHandler(OpenAILikeBase):
logger_fn=None,
headers={},
timeout: Optional[Union[float, httpx.Timeout]] = None,
json_mode: bool = False,
) -> ModelResponse:
if timeout is None:
timeout = httpx.Timeout(timeout=600.0, connect=5.0)
@ -204,6 +181,8 @@ class OpenAILikeChatHandler(OpenAILikeBase):
api_base, headers=headers, data=json.dumps(data), timeout=timeout
)
response.raise_for_status()
response_json = response.json()
except httpx.HTTPStatusError as e:
raise OpenAILikeError(
status_code=e.response.status_code,
@ -214,26 +193,22 @@ class OpenAILikeChatHandler(OpenAILikeBase):
except Exception as e:
raise OpenAILikeError(status_code=500, message=str(e))
return OpenAILikeChatConfig._transform_response(
model=model,
response=response,
model_response=model_response,
stream=stream,
logging_obj=logging_obj,
optional_params=optional_params,
api_key=api_key,
data=data,
messages=messages,
print_verbose=print_verbose,
encoding=encoding,
json_mode=json_mode,
custom_llm_provider=custom_llm_provider,
base_model=base_model,
logging_obj.post_call(
input=messages,
api_key="",
original_response=response_json,
additional_args={"complete_input_dict": data},
)
response = ModelResponse(**response_json)
response.model = custom_llm_provider + "/" + (response.model or "")
if base_model is not None:
response._hidden_params["model"] = base_model
return response
def completion(
self,
*,
model: str,
messages: list,
api_base: str,
@ -255,7 +230,6 @@ class OpenAILikeChatHandler(OpenAILikeBase):
streaming_decoder: Optional[
CustomStreamingDecoder
] = None, # if openai-compatible api needs custom stream decoder - e.g. sagemaker
fake_stream: bool = False,
):
custom_endpoint = custom_endpoint or optional_params.pop(
"custom_endpoint", None
@ -269,24 +243,13 @@ class OpenAILikeChatHandler(OpenAILikeBase):
headers=headers,
)
stream: bool = optional_params.pop("stream", None) or False
extra_body = optional_params.pop("extra_body", {})
json_mode = optional_params.pop("json_mode", None)
optional_params.pop("max_retries", None)
if not fake_stream:
optional_params["stream"] = stream
if messages is not None and custom_llm_provider is not None:
provider_config = ProviderConfigManager.get_provider_config(
model=model, provider=LlmProviders(custom_llm_provider)
)
messages = provider_config._transform_messages(messages)
stream: bool = optional_params.get("stream", None) or False
optional_params["stream"] = stream
data = {
"model": model,
"messages": messages,
**optional_params,
**extra_body,
}
## LOGGING
@ -325,7 +288,6 @@ class OpenAILikeChatHandler(OpenAILikeBase):
client=client,
custom_llm_provider=custom_llm_provider,
streaming_decoder=streaming_decoder,
fake_stream=fake_stream,
)
else:
return self.acompletion_function(
@ -365,7 +327,6 @@ class OpenAILikeChatHandler(OpenAILikeBase):
messages=messages,
logging_obj=logging_obj,
streaming_decoder=streaming_decoder,
fake_stream=fake_stream,
)
# completion_stream.__iter__()
return CustomStreamWrapper(
@ -383,6 +344,7 @@ class OpenAILikeChatHandler(OpenAILikeBase):
)
response.raise_for_status()
response_json = response.json()
except httpx.HTTPStatusError as e:
raise OpenAILikeError(
status_code=e.response.status_code,
@ -394,19 +356,17 @@ class OpenAILikeChatHandler(OpenAILikeBase):
)
except Exception as e:
raise OpenAILikeError(status_code=500, message=str(e))
return OpenAILikeChatConfig._transform_response(
model=model,
response=response,
model_response=model_response,
stream=stream,
logging_obj=logging_obj,
optional_params=optional_params,
api_key=api_key,
data=data,
messages=messages,
print_verbose=print_verbose,
encoding=encoding,
json_mode=json_mode,
custom_llm_provider=custom_llm_provider,
base_model=base_model,
logging_obj.post_call(
input=messages,
api_key="",
original_response=response_json,
additional_args={"complete_input_dict": data},
)
response = ModelResponse(**response_json)
response.model = custom_llm_provider + "/" + (response.model or "")
if base_model is not None:
response._hidden_params["model"] = base_model
return response

View file

@ -1,98 +0,0 @@
"""
OpenAI-like chat completion transformation
"""
import types
from typing import List, Optional, Tuple, Union
import httpx
from pydantic import BaseModel
import litellm
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
from litellm.types.utils import ModelResponse
from ....utils import _remove_additional_properties, _remove_strict_from_schema
from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
class OpenAILikeChatConfig(OpenAIGPTConfig):
def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str]
) -> Tuple[Optional[str], Optional[str]]:
api_base = api_base or get_secret_str("OPENAI_LIKE_API_BASE") # type: ignore
dynamic_api_key = (
api_key or get_secret_str("OPENAI_LIKE_API_KEY") or ""
) # vllm does not require an api key
return api_base, dynamic_api_key
@staticmethod
def _convert_tool_response_to_message(
message: ChatCompletionAssistantMessage, json_mode: bool
) -> ChatCompletionAssistantMessage:
"""
if json_mode is true, convert the returned tool call response to a content with json str
e.g. input:
{"role": "assistant", "tool_calls": [{"id": "call_5ms4", "type": "function", "function": {"name": "json_tool_call", "arguments": "{\"key\": \"question\", \"value\": \"What is the capital of France?\"}"}}]}
output:
{"role": "assistant", "content": "{\"key\": \"question\", \"value\": \"What is the capital of France?\"}"}
"""
if not json_mode:
return message
_tool_calls = message.get("tool_calls")
if _tool_calls is None or len(_tool_calls) != 1:
return message
message["content"] = _tool_calls[0]["function"].get("arguments") or ""
message["tool_calls"] = None
return message
@staticmethod
def _transform_response(
model: str,
response: httpx.Response,
model_response: ModelResponse,
stream: bool,
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore
optional_params: dict,
api_key: Optional[str],
data: Union[dict, str],
messages: List,
print_verbose,
encoding,
json_mode: bool,
custom_llm_provider: str,
base_model: Optional[str],
) -> ModelResponse:
response_json = response.json()
logging_obj.post_call(
input=messages,
api_key="",
original_response=response_json,
additional_args={"complete_input_dict": data},
)
if json_mode:
for choice in response_json["choices"]:
message = OpenAILikeChatConfig._convert_tool_response_to_message(
choice.get("message"), json_mode
)
choice["message"] = message
returned_response = ModelResponse(**response_json)
returned_response.model = (
custom_llm_provider + "/" + (returned_response.model or "")
)
if base_model is not None:
returned_response._hidden_params["model"] = base_model
return returned_response

View file

@ -45,10 +45,7 @@ class OpenAILikeEmbeddingHandler(OpenAILikeBase):
response = None
try:
if client is None or isinstance(client, AsyncHTTPHandler):
self.async_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.OPENAI,
params={"timeout": timeout},
)
self.async_client = AsyncHTTPHandler(timeout=timeout) # type: ignore
else:
self.async_client = client
@ -65,7 +62,7 @@ class OpenAILikeEmbeddingHandler(OpenAILikeBase):
except httpx.HTTPStatusError as e:
raise OpenAILikeError(
status_code=e.response.status_code,
message=e.response.text if e.response else str(e),
message=response.text if response else str(e),
)
except httpx.TimeoutException:
raise OpenAILikeError(

View file

@ -19,10 +19,7 @@ import litellm.litellm_core_utils
import litellm.litellm_core_utils.litellm_logging
from litellm import verbose_logger
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
from .base import BaseLLM
@ -552,10 +549,7 @@ class PredibaseChatCompletion(BaseLLM):
headers={},
) -> ModelResponse:
async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.PREDIBASE,
params={"timeout": timeout},
)
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=timeout))
try:
response = await async_handler.post(
api_base, headers=headers, data=json.dumps(data)

View file

@ -33,7 +33,6 @@ from litellm.types.llms.openai import (
ChatCompletionAssistantToolCall,
ChatCompletionFunctionMessage,
ChatCompletionImageObject,
ChatCompletionImageUrlObject,
ChatCompletionTextObject,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolMessage,
@ -682,27 +681,6 @@ def construct_tool_use_system_prompt(
return tool_use_system_prompt
def convert_generic_image_chunk_to_openai_image_obj(
image_chunk: GenericImageParsingChunk,
) -> str:
"""
Convert a generic image chunk to an OpenAI image object.
Input:
GenericImageParsingChunk(
type="base64",
media_type="image/jpeg",
data="...",
)
Return:
"data:image/jpeg;base64,{base64_image}"
"""
return "data:{};{},{}".format(
image_chunk["media_type"], image_chunk["type"], image_chunk["data"]
)
def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsingChunk:
"""
Input:
@ -728,7 +706,6 @@ def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsing
data=base64_data,
)
except Exception as e:
traceback.print_exc()
if "Error: Unable to fetch image from URL" in str(e):
raise e
raise Exception(
@ -966,10 +943,17 @@ def _gemini_tool_call_invoke_helper(
name = function_call_params.get("name", "") or ""
arguments = function_call_params.get("arguments", "")
arguments_dict = json.loads(arguments)
function_call = litellm.types.llms.vertex_ai.FunctionCall(
name=name,
args=arguments_dict,
)
function_call: Optional[litellm.types.llms.vertex_ai.FunctionCall] = None
for k, v in arguments_dict.items():
inferred_protocol_value = infer_protocol_value(value=v)
_field = litellm.types.llms.vertex_ai.Field(
key=k, value={inferred_protocol_value: v}
)
_fields = litellm.types.llms.vertex_ai.FunctionCallArgs(fields=_field)
function_call = litellm.types.llms.vertex_ai.FunctionCall(
name=name,
args=_fields,
)
return function_call
@ -994,26 +978,54 @@ def convert_to_gemini_tool_call_invoke(
},
"""
"""
Gemini tool call invokes:
{
"role": "model",
"parts": [
Gemini tool call invokes: - https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/function-calling#submit-api-output
content {
role: "model"
parts [
{
"functionCall": {
"name": "get_current_weather",
"args": {
"unit": "fahrenheit",
"predicted_temperature": 45,
"location": "Boston, MA",
function_call {
name: "get_current_weather"
args {
fields {
key: "unit"
value {
string_value: "fahrenheit"
}
}
fields {
key: "predicted_temperature"
value {
number_value: 45
}
}
fields {
key: "location"
value {
string_value: "Boston, MA"
}
}
}
},
{
function_call {
name: "get_current_weather"
args {
fields {
key: "location"
value {
string_value: "San Francisco"
}
}
}
}
}
}
]
]
}
"""
"""
- json.load the arguments
- json.load the arguments
- iterate through arguments -> create a FunctionCallArgs for each field
"""
try:
_parts_list: List[litellm.types.llms.vertex_ai.PartType] = []
@ -1116,8 +1128,16 @@ def convert_to_gemini_tool_call_result(
# We can't determine from openai message format whether it's a successful or
# error call result so default to the successful result template
inferred_content_value = infer_protocol_value(value=content_str)
_field = litellm.types.llms.vertex_ai.Field(
key="content", value={inferred_content_value: content_str}
)
_function_call_args = litellm.types.llms.vertex_ai.FunctionCallArgs(fields=_field)
_function_response = litellm.types.llms.vertex_ai.FunctionResponse(
name=name, response={"content": content_str} # type: ignore
name=name, response=_function_call_args # type: ignore
)
_part = litellm.types.llms.vertex_ai.PartType(function_response=_function_response)
@ -1159,44 +1179,15 @@ def convert_to_anthropic_tool_result(
]
}
"""
anthropic_content: Union[
str,
List[Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]],
] = ""
content_str: str = ""
if isinstance(message["content"], str):
anthropic_content = message["content"]
content_str = message["content"]
elif isinstance(message["content"], List):
content_list = message["content"]
anthropic_content_list: List[
Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]
] = []
for content in content_list:
if content["type"] == "text":
anthropic_content_list.append(
AnthropicMessagesToolResultContent(
type="text",
text=content["text"],
)
)
elif content["type"] == "image_url":
if isinstance(content["image_url"], str):
image_chunk = convert_to_anthropic_image_obj(content["image_url"])
else:
image_chunk = convert_to_anthropic_image_obj(
content["image_url"]["url"]
)
anthropic_content_list.append(
AnthropicMessagesImageParam(
type="image",
source=AnthropicContentParamSource(
type="base64",
media_type=image_chunk["media_type"],
data=image_chunk["data"],
),
)
)
content_str += content["text"]
anthropic_content = anthropic_content_list
anthropic_tool_result: Optional[AnthropicMessagesToolResultParam] = None
## PROMPT CACHING CHECK ##
cache_control = message.get("cache_control", None)
@ -1207,14 +1198,14 @@ def convert_to_anthropic_tool_result(
# We can't determine from openai message format whether it's a successful or
# error call result so default to the successful result template
anthropic_tool_result = AnthropicMessagesToolResultParam(
type="tool_result", tool_use_id=tool_call_id, content=anthropic_content
type="tool_result", tool_use_id=tool_call_id, content=content_str
)
if message["role"] == "function":
function_message: ChatCompletionFunctionMessage = message
tool_call_id = function_message.get("tool_call_id") or str(uuid.uuid4())
anthropic_tool_result = AnthropicMessagesToolResultParam(
type="tool_result", tool_use_id=tool_call_id, content=anthropic_content
type="tool_result", tool_use_id=tool_call_id, content=content_str
)
if anthropic_tool_result is None:

View file

@ -9,10 +9,7 @@ import httpx # type: ignore
import requests # type: ignore
import litellm
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
from .prompt_templates.factory import custom_prompt, prompt_factory
@ -328,7 +325,7 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
async def async_handle_prediction_response_streaming(
prediction_url, api_token, print_verbose
):
http_handler = get_async_httpx_client(llm_provider=litellm.LlmProviders.REPLICATE)
http_handler = AsyncHTTPHandler(concurrent_limit=1)
previous_output = ""
output_string = ""
@ -563,9 +560,7 @@ async def async_completion(
logging_obj,
print_verbose,
) -> Union[ModelResponse, CustomStreamWrapper]:
http_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.REPLICATE,
)
http_handler = AsyncHTTPHandler(concurrent_limit=1)
prediction_url = await async_start_prediction(
version_id,
input_data,

View file

@ -18,10 +18,7 @@ import litellm
from litellm import verbose_logger
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.types.llms.databricks import GenericStreamingChunk
from litellm.utils import (
Choices,
@ -482,9 +479,8 @@ class CodestralTextCompletion(BaseLLM):
headers={},
) -> TextCompletionResponse:
async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.TEXT_COMPLETION_CODESTRAL,
params={"timeout": timeout},
async_handler = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=timeout), concurrent_limit=1
)
try:

View file

@ -8,11 +8,7 @@ import httpx # type: ignore
import requests # type: ignore
import litellm
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.utils import (
Choices,
CustomStreamWrapper,
@ -54,8 +50,8 @@ class TritonChatCompletion(BaseLLM):
logging_obj: Any,
api_key: Optional[str] = None,
) -> EmbeddingResponse:
async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.TRITON, params={"timeout": 600.0}
async_handler = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
)
response = await async_handler.post(url=api_base, data=json.dumps(data))
@ -265,9 +261,7 @@ class TritonChatCompletion(BaseLLM):
model_response,
type_of_model,
) -> ModelResponse:
handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.TRITON, params={"timeout": 600.0}
)
handler = AsyncHTTPHandler()
if stream:
return self._ahandle_stream( # type: ignore
handler, api_base, data_for_triton, model, logging_obj

View file

@ -107,10 +107,6 @@ def _get_image_mime_type_from_url(url: str) -> Optional[str]:
return "image/png"
elif url.endswith(".webp"):
return "image/webp"
elif url.endswith(".mp4"):
return "video/mp4"
elif url.endswith(".pdf"):
return "application/pdf"
return None
@ -298,12 +294,7 @@ def _transform_request_body(
optional_params = {k: v for k, v in optional_params.items() if k not in remove_keys}
try:
if custom_llm_provider == "gemini":
content = litellm.GoogleAIStudioGeminiConfig._transform_messages(
messages=messages
)
else:
content = litellm.VertexGeminiConfig._transform_messages(messages=messages)
content = _gemini_convert_messages_with_history(messages=messages)
tools: Optional[Tools] = optional_params.pop("tools", None)
tool_choice: Optional[ToolConfig] = optional_params.pop("tool_choice", None)
safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop(

View file

@ -35,12 +35,7 @@ from litellm.llms.custom_httpx.http_handler import (
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.prompt_templates.factory import (
convert_generic_image_chunk_to_openai_image_obj,
convert_to_anthropic_image_obj,
)
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionResponseMessage,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
@ -83,8 +78,6 @@ from ..common_utils import (
)
from ..vertex_llm_base import VertexBase
from .transformation import (
_gemini_convert_messages_with_history,
_process_gemini_image,
async_transform_request_body,
set_headers,
sync_transform_request_body,
@ -919,10 +912,6 @@ class VertexGeminiConfig:
return model_response
@staticmethod
def _transform_messages(messages: List[AllMessageValues]) -> List[ContentType]:
return _gemini_convert_messages_with_history(messages=messages)
class GoogleAIStudioGeminiConfig(
VertexGeminiConfig
@ -1026,32 +1015,6 @@ class GoogleAIStudioGeminiConfig(
model, non_default_params, optional_params, drop_params
)
@staticmethod
def _transform_messages(messages: List[AllMessageValues]) -> List[ContentType]:
"""
Google AI Studio Gemini does not support image urls in messages.
"""
for message in messages:
_message_content = message.get("content")
if _message_content is not None and isinstance(_message_content, list):
_parts: List[PartType] = []
for element in _message_content:
if element.get("type") == "image_url":
img_element = element
_image_url: Optional[str] = None
if isinstance(img_element.get("image_url"), dict):
_image_url = img_element["image_url"].get("url") # type: ignore
else:
_image_url = img_element.get("image_url") # type: ignore
if _image_url and "https://" in _image_url:
image_obj = convert_to_anthropic_image_obj(_image_url)
img_element["image_url"] = ( # type: ignore
convert_generic_image_chunk_to_openai_image_obj(
image_obj
)
)
return _gemini_convert_messages_with_history(messages=messages)
async def make_call(
client: Optional[AsyncHTTPHandler],
@ -1063,9 +1026,7 @@ async def make_call(
logging_obj,
):
if client is None:
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.VERTEX_AI,
)
client = AsyncHTTPHandler() # Create a new client if none provided
try:
response = await client.post(api_base, headers=headers, data=data, stream=True)

View file

@ -7,13 +7,8 @@ from typing import Any, List, Literal, Optional, Union
import httpx
import litellm
from litellm import EmbeddingResponse
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.llms.openai import EmbeddingInput
from litellm.types.llms.vertex_ai import (
VertexAIBatchEmbeddingsRequestBody,
@ -155,10 +150,7 @@ class GoogleBatchEmbeddings(VertexLLM):
else:
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
async_handler: AsyncHTTPHandler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.VERTEX_AI,
params={"timeout": timeout},
)
async_handler: AsyncHTTPHandler = AsyncHTTPHandler(**_params) # type: ignore
else:
async_handler = client # type: ignore

View file

@ -5,11 +5,7 @@ import httpx
from openai.types.image import Image
import litellm
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
VertexLLM,
)
@ -160,10 +156,7 @@ class VertexImageGeneration(VertexLLM):
else:
_params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
self.async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.VERTEX_AI,
params={"timeout": timeout},
)
self.async_handler = AsyncHTTPHandler(**_params) # type: ignore
else:
self.async_handler = client # type: ignore

View file

@ -5,11 +5,7 @@ import httpx
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
VertexAIError,
VertexLLM,
@ -176,10 +172,7 @@ class VertexMultimodalEmbedding(VertexLLM):
if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.VERTEX_AI,
params={"timeout": timeout},
)
client = AsyncHTTPHandler(**_params) # type: ignore
else:
client = client # type: ignore

View file

@ -14,7 +14,6 @@ from pydantic import BaseModel
import litellm
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
from litellm.llms.prompt_templates.factory import (
convert_to_anthropic_image_obj,
convert_to_gemini_tool_call_invoke,
@ -94,15 +93,11 @@ def _get_client_cache_key(
def _get_client_from_cache(client_cache_key: str):
return litellm.in_memory_llm_clients_cache.get_cache(client_cache_key)
return litellm.in_memory_llm_clients_cache.get(client_cache_key, None)
def _set_client_in_cache(client_cache_key: str, vertex_llm_model: Any):
litellm.in_memory_llm_clients_cache.set_cache(
key=client_cache_key,
value=vertex_llm_model,
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
)
litellm.in_memory_llm_clients_cache[client_cache_key] = vertex_llm_model
def completion( # noqa: PLR0915

View file

@ -57,7 +57,6 @@ class WatsonXChatHandler(OpenAILikeChatHandler):
def completion(
self,
*,
model: str,
messages: list,
api_base: str,
@ -76,8 +75,9 @@ class WatsonXChatHandler(OpenAILikeChatHandler):
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
custom_endpoint: Optional[bool] = None,
streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False,
streaming_decoder: Optional[
CustomStreamingDecoder
] = None, # if openai-compatible api needs custom stream decoder - e.g. sagemaker
):
api_params = _get_api_params(optional_params, print_verbose=print_verbose)

View file

@ -24,10 +24,7 @@ import httpx # type: ignore
import requests # type: ignore
import litellm
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
get_async_httpx_client,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.watsonx import WatsonXAIEndpoint
from litellm.utils import EmbeddingResponse, ModelResponse, Usage, map_finish_reason
@ -713,13 +710,10 @@ class RequestManager:
if stream:
request_params["stream"] = stream
try:
self.async_handler = get_async_httpx_client(
llm_provider=litellm.LlmProviders.WATSONX,
params={
"timeout": httpx.Timeout(
timeout=request_params.pop("timeout", 600.0), connect=5.0
),
},
self.async_handler = AsyncHTTPHandler(
timeout=httpx.Timeout(
timeout=request_params.pop("timeout", 600.0), connect=5.0
),
)
if "json" in request_params:
request_params["data"] = json.dumps(request_params.pop("json", {}))

View file

@ -1495,8 +1495,8 @@ def completion( # type: ignore # noqa: PLR0915
timeout=timeout, # type: ignore
custom_prompt_dict=custom_prompt_dict,
client=client, # pass AsyncOpenAI, OpenAI client
organization=organization,
custom_llm_provider=custom_llm_provider,
encoding=encoding,
)
elif (
model in litellm.open_ai_chat_completion_models
@ -3182,7 +3182,6 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
or custom_llm_provider == "azure_ai"
or custom_llm_provider == "together_ai"
or custom_llm_provider == "openai_like"
or custom_llm_provider == "jina_ai"
): # currently implemented aiohttp calls for just azure and openai, soon all.
# Await normally
init_response = await loop.run_in_executor(None, func_with_context)
@ -3440,10 +3439,6 @@ def embedding( # noqa: PLR0915
or litellm.openai_key
or get_secret_str("OPENAI_API_KEY")
)
if extra_headers is not None:
optional_params["extra_headers"] = extra_headers
api_type = "openai"
api_version = None

View file

@ -1745,8 +1745,7 @@
"output_cost_per_token": 0.00000080,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/llama3-8b-8192": {
"max_tokens": 8192,
@ -1756,74 +1755,7 @@
"output_cost_per_token": 0.00000008,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
},
"groq/llama-3.2-1b-preview": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000004,
"output_cost_per_token": 0.00000004,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
},
"groq/llama-3.2-3b-preview": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000006,
"output_cost_per_token": 0.00000006,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
},
"groq/llama-3.2-11b-text-preview": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000018,
"output_cost_per_token": 0.00000018,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
},
"groq/llama-3.2-11b-vision-preview": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000018,
"output_cost_per_token": 0.00000018,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
},
"groq/llama-3.2-90b-text-preview": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0000009,
"output_cost_per_token": 0.0000009,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
},
"groq/llama-3.2-90b-vision-preview": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0000009,
"output_cost_per_token": 0.0000009,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/llama3-70b-8192": {
"max_tokens": 8192,
@ -1833,8 +1765,7 @@
"output_cost_per_token": 0.00000079,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/llama-3.1-8b-instant": {
"max_tokens": 8192,
@ -1844,8 +1775,7 @@
"output_cost_per_token": 0.00000008,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/llama-3.1-70b-versatile": {
"max_tokens": 8192,
@ -1855,8 +1785,7 @@
"output_cost_per_token": 0.00000079,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/llama-3.1-405b-reasoning": {
"max_tokens": 8192,
@ -1866,8 +1795,7 @@
"output_cost_per_token": 0.00000079,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/mixtral-8x7b-32768": {
"max_tokens": 32768,
@ -1877,8 +1805,7 @@
"output_cost_per_token": 0.00000024,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/gemma-7b-it": {
"max_tokens": 8192,
@ -1888,8 +1815,7 @@
"output_cost_per_token": 0.00000007,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/gemma2-9b-it": {
"max_tokens": 8192,
@ -1899,8 +1825,7 @@
"output_cost_per_token": 0.00000020,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/llama3-groq-70b-8192-tool-use-preview": {
"max_tokens": 8192,
@ -1910,8 +1835,7 @@
"output_cost_per_token": 0.00000089,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"groq/llama3-groq-8b-8192-tool-use-preview": {
"max_tokens": 8192,
@ -1921,8 +1845,7 @@
"output_cost_per_token": 0.00000019,
"litellm_provider": "groq",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true
"supports_function_calling": true
},
"cerebras/llama3.1-8b": {
"max_tokens": 128000,
@ -2032,6 +1955,7 @@
"tool_use_system_prompt_tokens": 264,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_pdf_input": true,
"supports_response_schema": true
},
"claude-3-opus-20240229": {
@ -2097,7 +2021,6 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_response_schema": true
},
@ -3383,8 +3306,6 @@
"supports_vision": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
"tpm": 4000000,
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-001": {
@ -3408,8 +3329,6 @@
"supports_vision": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
"tpm": 4000000,
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash": {
@ -3432,8 +3351,6 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-latest": {
@ -3456,32 +3373,6 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_token": 0,
"input_cost_per_token_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_token_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b-exp-0924": {
@ -3504,8 +3395,6 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-exp-1114": {
@ -3528,12 +3417,7 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing",
"metadata": {
"notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro."
}
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-exp-0827": {
"max_tokens": 8192,
@ -3555,8 +3439,6 @@
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 2000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-flash-8b-exp-0827": {
@ -3578,9 +3460,6 @@
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 4000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-pro": {
@ -3594,10 +3473,7 @@
"litellm_provider": "gemini",
"mode": "chat",
"supports_function_calling": true,
"rpd": 30000,
"tpm": 120000,
"rpm": 360,
"source": "https://ai.google.dev/gemini-api/docs/models/gemini"
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini/gemini-1.5-pro": {
"max_tokens": 8192,
@ -3614,8 +3490,6 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-002": {
@ -3634,8 +3508,6 @@
"supports_tool_choice": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-001": {
@ -3654,8 +3526,6 @@
"supports_tool_choice": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-exp-0801": {
@ -3673,8 +3543,6 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-exp-0827": {
@ -3692,8 +3560,6 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-1.5-pro-latest": {
@ -3711,8 +3577,6 @@
"supports_vision": true,
"supports_tool_choice": true,
"supports_response_schema": true,
"tpm": 4000000,
"rpm": 1000,
"source": "https://ai.google.dev/pricing"
},
"gemini/gemini-pro-vision": {
@ -3727,9 +3591,6 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"rpd": 30000,
"tpm": 120000,
"rpm": 360,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini/gemini-gemma-2-27b-it": {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{93553:function(n,e,t){Promise.resolve().then(t.t.bind(t,63385,23)),Promise.resolve().then(t.t.bind(t,99646,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=93553)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/ea3759ed931c00b2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/8fbba1b67a4788fc.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-b9c71b6f9761a436.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-b9c71b6f9761a436.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/ea3759ed931c00b2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[82989,[\"665\",\"static/chunks/3014691f-b24e8254c7593934.js\",\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"902\",\"static/chunks/902-292bb6a83427dbc7.js\",\"131\",\"static/chunks/131-4ee1d633e8928742.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"626\",\"static/chunks/626-0c564a21577c9c53.js\",\"777\",\"static/chunks/777-9d9df0b75010dbf9.js\",\"931\",\"static/chunks/app/page-a952da77e0730c7c.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/ea3759ed931c00b2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"pDx3dChtj-paUmJExuV6u\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a13477d480030cb3.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a13477d480030cb3.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/8fbba1b67a4788fc.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[82989,[\"665\",\"static/chunks/3014691f-b24e8254c7593934.js\",\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"902\",\"static/chunks/902-58bf23027703b2e8.js\",\"131\",\"static/chunks/131-4ee1d633e8928742.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"626\",\"static/chunks/626-0c564a21577c9c53.js\",\"777\",\"static/chunks/777-80eb84a5285bfa2d.js\",\"931\",\"static/chunks/app/page-413af091866cb902.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/8fbba1b67a4788fc.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"4u3imMIH2UVoP8L-yPCjs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[82989,["665","static/chunks/3014691f-b24e8254c7593934.js","936","static/chunks/2f6dbc85-cac2949a76539886.js","902","static/chunks/902-292bb6a83427dbc7.js","131","static/chunks/131-4ee1d633e8928742.js","684","static/chunks/684-16b194c83a169f6d.js","626","static/chunks/626-0c564a21577c9c53.js","777","static/chunks/777-9d9df0b75010dbf9.js","931","static/chunks/app/page-a952da77e0730c7c.js"],""]
3:I[82989,["665","static/chunks/3014691f-b24e8254c7593934.js","936","static/chunks/2f6dbc85-cac2949a76539886.js","902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-4ee1d633e8928742.js","684","static/chunks/684-16b194c83a169f6d.js","626","static/chunks/626-0c564a21577c9c53.js","777","static/chunks/777-80eb84a5285bfa2d.js","931","static/chunks/app/page-413af091866cb902.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["pDx3dChtj-paUmJExuV6u",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["4u3imMIH2UVoP8L-yPCjs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/8fbba1b67a4788fc.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[87494,["902","static/chunks/902-292bb6a83427dbc7.js","131","static/chunks/131-4ee1d633e8928742.js","777","static/chunks/777-9d9df0b75010dbf9.js","418","static/chunks/app/model_hub/page-748a83a8e772a56b.js"],""]
3:I[87494,["902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-4ee1d633e8928742.js","777","static/chunks/777-80eb84a5285bfa2d.js","418","static/chunks/app/model_hub/page-748a83a8e772a56b.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["pDx3dChtj-paUmJExuV6u",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["4u3imMIH2UVoP8L-yPCjs",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/8fbba1b67a4788fc.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","902","static/chunks/902-292bb6a83427dbc7.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-9d9df0b75010dbf9.js","461","static/chunks/app/onboarding/page-884a15d08f8be397.js"],""]
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","902","static/chunks/902-58bf23027703b2e8.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-80eb84a5285bfa2d.js","461","static/chunks/app/onboarding/page-884a15d08f8be397.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["pDx3dChtj-paUmJExuV6u",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["4u3imMIH2UVoP8L-yPCjs",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/8fbba1b67a4788fc.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -11,44 +11,10 @@ model_list:
model: vertex_ai/claude-3-5-sonnet-v2
vertex_ai_project: "adroit-crow-413218"
vertex_ai_location: "us-east5"
- model_name: openai-gpt-4o-realtime-audio
litellm_params:
model: openai/gpt-4o-realtime-preview-2024-10-01
api_key: os.environ/OPENAI_API_KEY
- model_name: openai/*
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
- model_name: openai/*
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
model_info:
access_groups: ["public-openai-models"]
- model_name: openai/gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
model_info:
access_groups: ["private-openai-models"]
router_settings:
routing_strategy: usage-based-routing-v2
#redis_url: "os.environ/REDIS_URL"
redis_host: "os.environ/REDIS_HOST"
redis_port: "os.environ/REDIS_PORT"
litellm_settings:
cache: true
cache_params:
type: redis
host: "os.environ/REDIS_HOST"
port: "os.environ/REDIS_PORT"
namespace: "litellm.caching"
ttl: 600
# key_generation_settings:
# team_key_generation:
# allowed_team_member_roles: ["admin"]
# required_params: ["tags"] # require team admins to set tags for cost-tracking when generating a team key
# personal_key_generation: # maps to 'Default Team' on UI
# allowed_user_roles: ["proxy_admin"]
router_settings:
model_group_alias:
"gpt-4-turbo": # Aliased model name
model: "gpt-4" # Actual model name in 'model_list'
hidden: true

View file

@ -2,7 +2,6 @@ import enum
import json
import os
import sys
import traceback
import uuid
from dataclasses import fields
from datetime import datetime
@ -13,15 +12,7 @@ from typing_extensions import Annotated, TypedDict
from litellm.types.integrations.slack_alerting import AlertType
from litellm.types.router import RouterErrors, UpdateRouterConfig
from litellm.types.utils import (
EmbeddingResponse,
ImageResponse,
ModelResponse,
ProviderField,
StandardCallbackDynamicParams,
StandardPassThroughResponseObject,
TextCompletionResponse,
)
from litellm.types.utils import ProviderField, StandardCallbackDynamicParams
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
@ -891,7 +882,15 @@ class DeleteCustomerRequest(LiteLLMBase):
user_ids: List[str]
class MemberBase(LiteLLMBase):
class Member(LiteLLMBase):
role: Literal[
LitellmUserRoles.ORG_ADMIN,
LitellmUserRoles.INTERNAL_USER,
LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
# older Member roles
"admin",
"user",
]
user_id: Optional[str] = None
user_email: Optional[str] = None
@ -905,21 +904,6 @@ class MemberBase(LiteLLMBase):
return values
class Member(MemberBase):
role: Literal[
"admin",
"user",
]
class OrgMember(MemberBase):
role: Literal[
LitellmUserRoles.ORG_ADMIN,
LitellmUserRoles.INTERNAL_USER,
LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
]
class TeamBase(LiteLLMBase):
team_alias: Optional[str] = None
team_id: Optional[str] = None
@ -1985,25 +1969,6 @@ class MemberAddRequest(LiteLLMBase):
super().__init__(**data)
class OrgMemberAddRequest(LiteLLMBase):
member: Union[List[OrgMember], OrgMember]
def __init__(self, **data):
member_data = data.get("member")
if isinstance(member_data, list):
# If member is a list of dictionaries, convert each dictionary to a Member object
members = [OrgMember(**item) for item in member_data]
# Replace member_data with the list of Member objects
data["member"] = members
elif isinstance(member_data, dict):
# If member is a dictionary, convert it to a single Member object
member = OrgMember(**member_data)
# Replace member_data with the single Member object
data["member"] = member
# Call the superclass __init__ method to initialize the object
super().__init__(**data)
class TeamAddMemberResponse(LiteLLM_TeamTable):
updated_users: List[LiteLLM_UserTable]
updated_team_memberships: List[LiteLLM_TeamMembership]
@ -2052,7 +2017,7 @@ class TeamMemberUpdateResponse(MemberUpdateResponse):
# Organization Member Requests
class OrganizationMemberAddRequest(OrgMemberAddRequest):
class OrganizationMemberAddRequest(MemberAddRequest):
organization_id: str
max_budget_in_organization: Optional[float] = (
None # Users max budget within the organization
@ -2110,7 +2075,6 @@ class SpecialHeaders(enum.Enum):
openai_authorization = "Authorization"
azure_authorization = "API-Key"
anthropic_authorization = "x-api-key"
google_ai_studio_authorization = "x-goog-api-key"
class LitellmDataForBackendLLMCall(TypedDict, total=False):
@ -2169,25 +2133,3 @@ class UserManagementEndpointParamDocStringEnums(str, enum.Enum):
spend_doc_str = """Optional[float] - Amount spent by user. Default is 0. Will be updated by proxy whenever user is used."""
team_id_doc_str = """Optional[str] - [DEPRECATED PARAM] The team id of the user. Default is None."""
duration_doc_str = """Optional[str] - Duration for the key auto-created on `/user/new`. Default is None."""
PassThroughEndpointLoggingResultValues = Union[
ModelResponse,
TextCompletionResponse,
ImageResponse,
EmbeddingResponse,
StandardPassThroughResponseObject,
]
class PassThroughEndpointLoggingTypedDict(TypedDict):
result: Optional[PassThroughEndpointLoggingResultValues]
kwargs: dict
LiteLLM_ManagementEndpoint_MetadataFields = [
"model_rpm_limit",
"model_tpm_limit",
"guardrails",
"tags",
]

View file

@ -60,7 +60,6 @@ def common_checks( # noqa: PLR0915
global_proxy_spend: Optional[float],
general_settings: dict,
route: str,
llm_router: Optional[litellm.Router],
) -> bool:
"""
Common checks across jwt + key-based auth.
@ -98,12 +97,7 @@ def common_checks( # noqa: PLR0915
# this means the team has access to all models on the proxy
pass
# check if the team model is an access_group
elif (
model_in_access_group(
model=_model, team_models=team_object.models, llm_router=llm_router
)
is True
):
elif model_in_access_group(_model, team_object.models) is True:
pass
elif _model and "*" in _model:
pass
@ -379,33 +373,36 @@ async def get_end_user_object(
return None
def model_in_access_group(
model: str, team_models: Optional[List[str]], llm_router: Optional[litellm.Router]
) -> bool:
def model_in_access_group(model: str, team_models: Optional[List[str]]) -> bool:
from collections import defaultdict
from litellm.proxy.proxy_server import llm_router
if team_models is None:
return True
if model in team_models:
return True
access_groups: dict[str, list[str]] = defaultdict(list)
access_groups = defaultdict(list)
if llm_router:
access_groups = llm_router.get_model_access_groups(model_name=model)
access_groups = llm_router.get_model_access_groups()
models_in_current_access_groups = []
if len(access_groups) > 0: # check if token contains any model access groups
for idx, m in enumerate(
team_models
): # loop token models, if any of them are an access group add the access group
if m in access_groups:
return True
# if it is an access group we need to remove it from valid_token.models
models_in_group = access_groups[m]
models_in_current_access_groups.extend(models_in_group)
# Filter out models that are access_groups
filtered_models = [m for m in team_models if m not in access_groups]
filtered_models += models_in_current_access_groups
if model in filtered_models:
return True
return False
@ -589,63 +586,26 @@ async def _get_team_db_check(team_id: str, prisma_client: PrismaClient):
)
async def _get_team_object_from_db(team_id: str, prisma_client: PrismaClient):
return await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": team_id}
)
async def _get_team_object_from_user_api_key_cache(
async def get_team_object(
team_id: str,
prisma_client: PrismaClient,
prisma_client: Optional[PrismaClient],
user_api_key_cache: DualCache,
last_db_access_time: LimitedSizeOrderedDict,
db_cache_expiry: int,
proxy_logging_obj: Optional[ProxyLogging],
key: str,
parent_otel_span: Optional[Span] = None,
proxy_logging_obj: Optional[ProxyLogging] = None,
check_cache_only: Optional[bool] = None,
) -> LiteLLM_TeamTableCachedObj:
db_access_time_key = key
should_check_db = _should_check_db(
key=db_access_time_key,
last_db_access_time=last_db_access_time,
db_cache_expiry=db_cache_expiry,
)
if should_check_db:
response = await _get_team_db_check(
team_id=team_id, prisma_client=prisma_client
"""
- Check if team id in proxy Team Table
- if valid, return LiteLLM_TeamTable object with defined limits
- if not, then raise an error
"""
if prisma_client is None:
raise Exception(
"No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
)
else:
response = None
if response is None:
raise Exception
_response = LiteLLM_TeamTableCachedObj(**response.dict())
# save the team object to cache
await _cache_team_object(
team_id=team_id,
team_table=_response,
user_api_key_cache=user_api_key_cache,
proxy_logging_obj=proxy_logging_obj,
)
# save to db access time
# save to db access time
_update_last_db_access_time(
key=db_access_time_key,
value=_response,
last_db_access_time=last_db_access_time,
)
return _response
async def _get_team_object_from_cache(
key: str,
proxy_logging_obj: Optional[ProxyLogging],
user_api_key_cache: DualCache,
parent_otel_span: Optional[Span],
) -> Optional[LiteLLM_TeamTableCachedObj]:
# check if in cache
key = "team_id:{}".format(team_id)
cached_team_obj: Optional[LiteLLM_TeamTableCachedObj] = None
## CHECK REDIS CACHE ##
@ -653,7 +613,6 @@ async def _get_team_object_from_cache(
proxy_logging_obj is not None
and proxy_logging_obj.internal_usage_cache.dual_cache
):
cached_team_obj = (
await proxy_logging_obj.internal_usage_cache.dual_cache.async_get_cache(
key=key, parent_otel_span=parent_otel_span
@ -669,58 +628,47 @@ async def _get_team_object_from_cache(
elif isinstance(cached_team_obj, LiteLLM_TeamTableCachedObj):
return cached_team_obj
return None
async def get_team_object(
team_id: str,
prisma_client: Optional[PrismaClient],
user_api_key_cache: DualCache,
parent_otel_span: Optional[Span] = None,
proxy_logging_obj: Optional[ProxyLogging] = None,
check_cache_only: Optional[bool] = None,
check_db_only: Optional[bool] = None,
) -> LiteLLM_TeamTableCachedObj:
"""
- Check if team id in proxy Team Table
- if valid, return LiteLLM_TeamTable object with defined limits
- if not, then raise an error
"""
if prisma_client is None:
if check_cache_only:
raise Exception(
"No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
f"Team doesn't exist in cache + check_cache_only=True. Team={team_id}."
)
# check if in cache
key = "team_id:{}".format(team_id)
if not check_db_only:
cached_team_obj = await _get_team_object_from_cache(
key=key,
proxy_logging_obj=proxy_logging_obj,
user_api_key_cache=user_api_key_cache,
parent_otel_span=parent_otel_span,
)
if cached_team_obj is not None:
return cached_team_obj
if check_cache_only:
raise Exception(
f"Team doesn't exist in cache + check_cache_only=True. Team={team_id}."
)
# else, check db
try:
return await _get_team_object_from_user_api_key_cache(
team_id=team_id,
prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache,
proxy_logging_obj=proxy_logging_obj,
db_access_time_key = "team_id:{}".format(team_id)
should_check_db = _should_check_db(
key=db_access_time_key,
last_db_access_time=last_db_access_time,
db_cache_expiry=db_cache_expiry,
key=key,
)
if should_check_db:
response = await _get_team_db_check(
team_id=team_id, prisma_client=prisma_client
)
else:
response = None
if response is None:
raise Exception
_response = LiteLLM_TeamTableCachedObj(**response.dict())
# save the team object to cache
await _cache_team_object(
team_id=team_id,
team_table=_response,
user_api_key_cache=user_api_key_cache,
proxy_logging_obj=proxy_logging_obj,
)
# save to db access time
# save to db access time
_update_last_db_access_time(
key=db_access_time_key,
value=_response,
last_db_access_time=last_db_access_time,
)
return _response
except Exception:
raise Exception(
f"Team doesn't exist in db. Team={team_id}. Create team via `/team/new` call."
@ -877,10 +825,7 @@ async def get_org_object(
async def can_key_call_model(
model: str,
llm_model_list: Optional[list],
valid_token: UserAPIKeyAuth,
llm_router: Optional[litellm.Router],
model: str, llm_model_list: Optional[list], valid_token: UserAPIKeyAuth
) -> Literal[True]:
"""
Checks if token can call a given model
@ -900,29 +845,35 @@ async def can_key_call_model(
)
from collections import defaultdict
from litellm.proxy.proxy_server import llm_router
access_groups = defaultdict(list)
if llm_router:
access_groups = llm_router.get_model_access_groups(model_name=model)
access_groups = llm_router.get_model_access_groups()
if (
len(access_groups) > 0 and llm_router is not None
): # check if token contains any model access groups
models_in_current_access_groups = []
if len(access_groups) > 0: # check if token contains any model access groups
for idx, m in enumerate(
valid_token.models
): # loop token models, if any of them are an access group add the access group
if m in access_groups:
return True
# if it is an access group we need to remove it from valid_token.models
models_in_group = access_groups[m]
models_in_current_access_groups.extend(models_in_group)
# Filter out models that are access_groups
filtered_models = [m for m in valid_token.models if m not in access_groups]
filtered_models += models_in_current_access_groups
verbose_proxy_logger.debug(f"model: {model}; allowed_models: {filtered_models}")
all_model_access: bool = False
if (
len(filtered_models) == 0 and len(valid_token.models) == 0
) or "*" in filtered_models:
len(filtered_models) == 0
or "*" in filtered_models
or "openai/*" in filtered_models
):
all_model_access = True
if model is not None and model not in filtered_models and all_model_access is False:

Some files were not shown because too many files have changed in this diff Show more