forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_dev_11_24_2024
This commit is contained in:
commit
beb0135980
120 changed files with 2726 additions and 1567 deletions
|
@ -807,11 +807,11 @@ jobs:
|
||||||
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||||
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
||||||
- run: ruff check ./litellm
|
- run: ruff check ./litellm
|
||||||
- run: python ./tests/documentation_tests/test_general_setting_keys.py
|
# - run: python ./tests/documentation_tests/test_general_setting_keys.py
|
||||||
- run: python ./tests/code_coverage_tests/router_code_coverage.py
|
- run: python ./tests/code_coverage_tests/router_code_coverage.py
|
||||||
- run: python ./tests/code_coverage_tests/test_router_strategy_async.py
|
- run: python ./tests/code_coverage_tests/test_router_strategy_async.py
|
||||||
- run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py
|
- run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py
|
||||||
- run: python ./tests/documentation_tests/test_env_keys.py
|
# - run: python ./tests/documentation_tests/test_env_keys.py
|
||||||
- run: python ./tests/documentation_tests/test_api_docs.py
|
- run: python ./tests/documentation_tests/test_api_docs.py
|
||||||
- run: python ./tests/code_coverage_tests/ensure_async_clients_test.py
|
- run: python ./tests/code_coverage_tests/ensure_async_clients_test.py
|
||||||
- run: helm lint ./deploy/charts/litellm-helm
|
- run: helm lint ./deploy/charts/litellm-helm
|
||||||
|
@ -1191,6 +1191,7 @@ jobs:
|
||||||
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
||||||
-e LITELLM_MASTER_KEY="sk-1234" \
|
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||||
|
-e GEMINI_API_KEY=$GEMINI_API_KEY \
|
||||||
-e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
|
-e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
|
||||||
-e LITELLM_LICENSE=$LITELLM_LICENSE \
|
-e LITELLM_LICENSE=$LITELLM_LICENSE \
|
||||||
--name my-app \
|
--name my-app \
|
||||||
|
@ -1228,12 +1229,13 @@ jobs:
|
||||||
name: Install Node.js dependencies
|
name: Install Node.js dependencies
|
||||||
command: |
|
command: |
|
||||||
npm install @google-cloud/vertexai
|
npm install @google-cloud/vertexai
|
||||||
|
npm install @google/generative-ai
|
||||||
npm install --save-dev jest
|
npm install --save-dev jest
|
||||||
|
|
||||||
- run:
|
- run:
|
||||||
name: Run Vertex AI tests
|
name: Run Vertex AI, Google AI Studio Node.js tests
|
||||||
command: |
|
command: |
|
||||||
npx jest tests/pass_through_tests/test_vertex.test.js --verbose
|
npx jest tests/pass_through_tests --verbose
|
||||||
no_output_timeout: 30m
|
no_output_timeout: 30m
|
||||||
- run:
|
- run:
|
||||||
name: Run tests
|
name: Run tests
|
||||||
|
|
|
@ -1,12 +1,21 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
|
||||||
# Google AI Studio SDK
|
# Google AI Studio SDK
|
||||||
|
|
||||||
Pass-through endpoints for Google AI Studio - call provider-specific endpoint, in native format (no translation).
|
Pass-through endpoints for Google AI Studio - call provider-specific endpoint, in native format (no translation).
|
||||||
|
|
||||||
Just replace `https://generativelanguage.googleapis.com` with `LITELLM_PROXY_BASE_URL/gemini` 🚀
|
Just replace `https://generativelanguage.googleapis.com` with `LITELLM_PROXY_BASE_URL/gemini`
|
||||||
|
|
||||||
#### **Example Usage**
|
#### **Example Usage**
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-anything' \
|
curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-anything' \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{
|
-d '{
|
||||||
"contents": [{
|
"contents": [{
|
||||||
|
@ -17,6 +26,53 @@ http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-any
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="js" label="Google AI Node.js SDK">
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const { GoogleGenerativeAI } = require("@google/generative-ai");
|
||||||
|
|
||||||
|
const modelParams = {
|
||||||
|
model: 'gemini-pro',
|
||||||
|
};
|
||||||
|
|
||||||
|
const requestOptions = {
|
||||||
|
baseUrl: 'http://localhost:4000/gemini', // http://<proxy-base-url>/gemini
|
||||||
|
};
|
||||||
|
|
||||||
|
const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key
|
||||||
|
const model = genAI.getGenerativeModel(modelParams, requestOptions);
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
const result = await model.generateContent("Explain how AI works");
|
||||||
|
console.log(result.response.text());
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For streaming responses
|
||||||
|
async function main_streaming() {
|
||||||
|
try {
|
||||||
|
const streamingResult = await model.generateContentStream("Explain how AI works");
|
||||||
|
for await (const chunk of streamingResult.stream) {
|
||||||
|
console.log('Stream chunk:', JSON.stringify(chunk));
|
||||||
|
}
|
||||||
|
const aggregatedResponse = await streamingResult.response;
|
||||||
|
console.log('Aggregated response:', JSON.stringify(aggregatedResponse));
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
|
// main_streaming();
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
Supports **ALL** Google AI Studio Endpoints (including streaming).
|
Supports **ALL** Google AI Studio Endpoints (including streaming).
|
||||||
|
|
||||||
[**See All Google AI Studio Endpoints**](https://ai.google.dev/api)
|
[**See All Google AI Studio Endpoints**](https://ai.google.dev/api)
|
||||||
|
@ -166,14 +222,14 @@ curl -X POST "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Advanced - Use with Virtual Keys
|
## Advanced
|
||||||
|
|
||||||
Pre-requisites
|
Pre-requisites
|
||||||
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
|
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
|
||||||
|
|
||||||
Use this, to avoid giving developers the raw Google AI Studio key, but still letting them use Google AI Studio endpoints.
|
Use this, to avoid giving developers the raw Google AI Studio key, but still letting them use Google AI Studio endpoints.
|
||||||
|
|
||||||
### Usage
|
### Use with Virtual Keys
|
||||||
|
|
||||||
1. Setup environment
|
1. Setup environment
|
||||||
|
|
||||||
|
@ -221,3 +277,65 @@ http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:countTokens?key=sk-123
|
||||||
}]
|
}]
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Send `tags` in request headers
|
||||||
|
|
||||||
|
Use this if you want `tags` to be tracked in the LiteLLM DB and on logging callbacks.
|
||||||
|
|
||||||
|
Pass tags in request headers as a comma separated list. In the example below the following tags will be tracked
|
||||||
|
|
||||||
|
```
|
||||||
|
tags: ["gemini-js-sdk", "pass-through-endpoint"]
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl 'http://0.0.0.0:4000/gemini/v1beta/models/gemini-1.5-flash:generateContent?key=sk-anything' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'tags: gemini-js-sdk,pass-through-endpoint' \
|
||||||
|
-d '{
|
||||||
|
"contents": [{
|
||||||
|
"parts":[{
|
||||||
|
"text": "The quick brown fox jumps over the lazy dog."
|
||||||
|
}]
|
||||||
|
}]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="js" label="Google AI Node.js SDK">
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const { GoogleGenerativeAI } = require("@google/generative-ai");
|
||||||
|
|
||||||
|
const modelParams = {
|
||||||
|
model: 'gemini-pro',
|
||||||
|
};
|
||||||
|
|
||||||
|
const requestOptions = {
|
||||||
|
baseUrl: 'http://localhost:4000/gemini', // http://<proxy-base-url>/gemini
|
||||||
|
customHeaders: {
|
||||||
|
"tags": "gemini-js-sdk,pass-through-endpoint"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const genAI = new GoogleGenerativeAI("sk-1234");
|
||||||
|
const model = genAI.getGenerativeModel(modelParams, requestOptions);
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
const result = await model.generateContent("Explain how AI works");
|
||||||
|
console.log(result.response.text());
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
File diff suppressed because it is too large
Load diff
481
docs/my-website/docs/proxy/config_settings.md
Normal file
481
docs/my-website/docs/proxy/config_settings.md
Normal file
|
@ -0,0 +1,481 @@
|
||||||
|
# All settings
|
||||||
|
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
environment_variables: {}
|
||||||
|
|
||||||
|
model_list:
|
||||||
|
- model_name: string
|
||||||
|
litellm_params: {}
|
||||||
|
model_info:
|
||||||
|
id: string
|
||||||
|
mode: embedding
|
||||||
|
input_cost_per_token: 0
|
||||||
|
output_cost_per_token: 0
|
||||||
|
max_tokens: 2048
|
||||||
|
base_model: gpt-4-1106-preview
|
||||||
|
additionalProp1: {}
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
# Logging/Callback settings
|
||||||
|
success_callback: ["langfuse"] # list of success callbacks
|
||||||
|
failure_callback: ["sentry"] # list of failure callbacks
|
||||||
|
callbacks: ["otel"] # list of callbacks - runs on success and failure
|
||||||
|
service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus
|
||||||
|
turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
|
||||||
|
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
||||||
|
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
|
||||||
|
|
||||||
|
# Networking settings
|
||||||
|
request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
|
||||||
|
force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API
|
||||||
|
|
||||||
|
set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
|
||||||
|
json_logs: boolean # if true, logs will be in json format
|
||||||
|
|
||||||
|
# Fallbacks, reliability
|
||||||
|
default_fallbacks: ["claude-opus"] # set default_fallbacks, in case a specific model group is misconfigured / bad.
|
||||||
|
content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}] # fallbacks for ContentPolicyErrors
|
||||||
|
context_window_fallbacks: [{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}] # fallbacks for ContextWindowExceededErrors
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Caching settings
|
||||||
|
cache: true
|
||||||
|
cache_params: # set cache params for redis
|
||||||
|
type: redis # type of cache to initialize
|
||||||
|
|
||||||
|
# Optional - Redis Settings
|
||||||
|
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
|
||||||
|
port: 6379 # The port number for the Redis cache. Required if type is "redis".
|
||||||
|
password: "your_password" # The password for the Redis cache. Required if type is "redis".
|
||||||
|
namespace: "litellm.caching.caching" # namespace for redis cache
|
||||||
|
|
||||||
|
# Optional - Redis Cluster Settings
|
||||||
|
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
|
||||||
|
|
||||||
|
# Optional - Redis Sentinel Settings
|
||||||
|
service_name: "mymaster"
|
||||||
|
sentinel_nodes: [["localhost", 26379]]
|
||||||
|
|
||||||
|
# Optional - Qdrant Semantic Cache Settings
|
||||||
|
qdrant_semantic_cache_embedding_model: openai-embedding # the model should be defined on the model_list
|
||||||
|
qdrant_collection_name: test_collection
|
||||||
|
qdrant_quantization_config: binary
|
||||||
|
similarity_threshold: 0.8 # similarity threshold for semantic cache
|
||||||
|
|
||||||
|
# Optional - S3 Cache Settings
|
||||||
|
s3_bucket_name: cache-bucket-litellm # AWS Bucket Name for S3
|
||||||
|
s3_region_name: us-west-2 # AWS Region Name for S3
|
||||||
|
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
|
||||||
|
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
|
||||||
|
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 bucket
|
||||||
|
|
||||||
|
# Common Cache settings
|
||||||
|
# Optional - Supported call types for caching
|
||||||
|
supported_call_types: ["acompletion", "atext_completion", "aembedding", "atranscription"]
|
||||||
|
# /chat/completions, /completions, /embeddings, /audio/transcriptions
|
||||||
|
mode: default_off # if default_off, you need to opt in to caching on a per call basis
|
||||||
|
ttl: 600 # ttl for caching
|
||||||
|
|
||||||
|
|
||||||
|
callback_settings:
|
||||||
|
otel:
|
||||||
|
message_logging: boolean # OTEL logging callback specific settings
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
completion_model: string
|
||||||
|
disable_spend_logs: boolean # turn off writing each transaction to the db
|
||||||
|
disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint)
|
||||||
|
disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached
|
||||||
|
disable_reset_budget: boolean # turn off reset budget scheduled task
|
||||||
|
disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking
|
||||||
|
enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
|
||||||
|
enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param
|
||||||
|
allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
|
||||||
|
key_management_system: google_kms # either google_kms or azure_kms
|
||||||
|
master_key: string
|
||||||
|
|
||||||
|
# Database Settings
|
||||||
|
database_url: string
|
||||||
|
database_connection_pool_limit: 0 # default 100
|
||||||
|
database_connection_timeout: 0 # default 60s
|
||||||
|
allow_requests_on_db_unavailable: boolean # if true, will allow requests that can not connect to the DB to verify Virtual Key to still work
|
||||||
|
|
||||||
|
custom_auth: string
|
||||||
|
max_parallel_requests: 0 # the max parallel requests allowed per deployment
|
||||||
|
global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up
|
||||||
|
infer_model_from_keys: true
|
||||||
|
background_health_checks: true
|
||||||
|
health_check_interval: 300
|
||||||
|
alerting: ["slack", "email"]
|
||||||
|
alerting_threshold: 0
|
||||||
|
use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints
|
||||||
|
```
|
||||||
|
|
||||||
|
### litellm_settings - Reference
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| success_callback | array of strings | List of success callbacks. [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
|
||||||
|
| failure_callback | array of strings | List of failure callbacks [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
|
||||||
|
| callbacks | array of strings | List of callbacks - runs on success and failure [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
|
||||||
|
| service_callbacks | array of strings | System health monitoring - Logs redis, postgres failures on specified services (e.g. datadog, prometheus) [Doc Metrics](prometheus) |
|
||||||
|
| turn_off_message_logging | boolean | If true, prevents messages and responses from being logged to callbacks, but request metadata will still be logged [Proxy Logging](logging) |
|
||||||
|
| modify_params | boolean | If true, allows modifying the parameters of the request before it is sent to the LLM provider |
|
||||||
|
| enable_preview_features | boolean | If true, enables preview features - e.g. Azure O1 Models with streaming support.|
|
||||||
|
| redact_user_api_key_info | boolean | If true, redacts information about the user api key from logs [Proxy Logging](logging#redacting-userapikeyinfo) |
|
||||||
|
| langfuse_default_tags | array of strings | Default tags for Langfuse Logging. Use this if you want to control which LiteLLM-specific fields are logged as tags by the LiteLLM proxy. By default LiteLLM Proxy logs no LiteLLM-specific fields as tags. [Further docs](./logging#litellm-specific-tags-on-langfuse---cache_hit-cache_key) |
|
||||||
|
| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION |
|
||||||
|
| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
|
||||||
|
| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
|
||||||
|
| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) |
|
||||||
|
| force_ipv4 | boolean | If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API |
|
||||||
|
| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
|
||||||
|
| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
|
||||||
|
| cache | boolean | If true, enables caching. [Further docs](./caching) |
|
||||||
|
| cache_params | object | Parameters for the cache. [Further docs](./caching) |
|
||||||
|
| cache_params.type | string | The type of cache to initialize. Can be one of ["local", "redis", "redis-semantic", "s3", "disk", "qdrant-semantic"]. Defaults to "redis". [Furher docs](./caching) |
|
||||||
|
| cache_params.host | string | The host address for the Redis cache. Required if type is "redis". |
|
||||||
|
| cache_params.port | integer | The port number for the Redis cache. Required if type is "redis". |
|
||||||
|
| cache_params.password | string | The password for the Redis cache. Required if type is "redis". |
|
||||||
|
| cache_params.namespace | string | The namespace for the Redis cache. |
|
||||||
|
| cache_params.redis_startup_nodes | array of objects | Redis Cluster Settings. [Further docs](./caching) |
|
||||||
|
| cache_params.service_name | string | Redis Sentinel Settings. [Further docs](./caching) |
|
||||||
|
| cache_params.sentinel_nodes | array of arrays | Redis Sentinel Settings. [Further docs](./caching) |
|
||||||
|
| cache_params.ttl | integer | The time (in seconds) to store entries in cache. |
|
||||||
|
| cache_params.qdrant_semantic_cache_embedding_model | string | The embedding model to use for qdrant semantic cache. |
|
||||||
|
| cache_params.qdrant_collection_name | string | The name of the collection to use for qdrant semantic cache. |
|
||||||
|
| cache_params.qdrant_quantization_config | string | The quantization configuration for the qdrant semantic cache. |
|
||||||
|
| cache_params.similarity_threshold | float | The similarity threshold for the semantic cache. |
|
||||||
|
| cache_params.s3_bucket_name | string | The name of the S3 bucket to use for the semantic cache. |
|
||||||
|
| cache_params.s3_region_name | string | The region name for the S3 bucket. |
|
||||||
|
| cache_params.s3_aws_access_key_id | string | The AWS access key ID for the S3 bucket. |
|
||||||
|
| cache_params.s3_aws_secret_access_key | string | The AWS secret access key for the S3 bucket. |
|
||||||
|
| cache_params.s3_endpoint_url | string | Optional - The endpoint URL for the S3 bucket. |
|
||||||
|
| cache_params.supported_call_types | array of strings | The types of calls to cache. [Further docs](./caching) |
|
||||||
|
| cache_params.mode | string | The mode of the cache. [Further docs](./caching) |
|
||||||
|
| disable_end_user_cost_tracking | boolean | If true, turns off end user cost tracking on prometheus metrics + litellm spend logs table on proxy. |
|
||||||
|
| key_generation_settings | object | Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation) |
|
||||||
|
|
||||||
|
### general_settings - Reference
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| completion_model | string | The default model to use for completions when `model` is not specified in the request |
|
||||||
|
| disable_spend_logs | boolean | If true, turns off writing each transaction to the database |
|
||||||
|
| disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) |
|
||||||
|
| disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached |
|
||||||
|
| disable_reset_budget | boolean | If true, turns off reset budget scheduled task |
|
||||||
|
| disable_adding_master_key_hash_to_db | boolean | If true, turns off storing master key hash in db |
|
||||||
|
| enable_jwt_auth | boolean | allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims. [Doc on JWT Tokens](token_auth) |
|
||||||
|
| enforce_user_param | boolean | If true, requires all OpenAI endpoint requests to have a 'user' param. [Doc on call hooks](call_hooks)|
|
||||||
|
| allowed_routes | array of strings | List of allowed proxy API routes a user can access [Doc on controlling allowed routes](enterprise#control-available-public-private-routes)|
|
||||||
|
| key_management_system | string | Specifies the key management system. [Doc Secret Managers](../secret) |
|
||||||
|
| master_key | string | The master key for the proxy [Set up Virtual Keys](virtual_keys) |
|
||||||
|
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
|
||||||
|
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
|
||||||
|
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
|
||||||
|
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
|
||||||
|
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
|
||||||
|
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
|
||||||
|
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
|
||||||
|
| infer_model_from_keys | boolean | If true, infers the model from the provided keys |
|
||||||
|
| background_health_checks | boolean | If true, enables background health checks. [Doc on health checks](health) |
|
||||||
|
| health_check_interval | integer | The interval for health checks in seconds [Doc on health checks](health) |
|
||||||
|
| alerting | array of strings | List of alerting methods [Doc on Slack Alerting](alerting) |
|
||||||
|
| alerting_threshold | integer | The threshold for triggering alerts [Doc on Slack Alerting](alerting) |
|
||||||
|
| use_client_credentials_pass_through_routes | boolean | If true, uses client credentials for all pass-through routes. [Doc on pass through routes](pass_through) |
|
||||||
|
| health_check_details | boolean | If false, hides health check details (e.g. remaining rate limit). [Doc on health checks](health) |
|
||||||
|
| public_routes | List[str] | (Enterprise Feature) Control list of public routes |
|
||||||
|
| alert_types | List[str] | Control list of alert types to send to slack (Doc on alert types)[./alerting.md] |
|
||||||
|
| enforced_params | List[str] | (Enterprise Feature) List of params that must be included in all requests to the proxy |
|
||||||
|
| enable_oauth2_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
|
||||||
|
| use_x_forwarded_for | str | If true, uses the X-Forwarded-For header to get the client IP address |
|
||||||
|
| service_account_settings | List[Dict[str, Any]] | Set `service_account_settings` if you want to create settings that only apply to service account keys (Doc on service accounts)[./service_accounts.md] |
|
||||||
|
| image_generation_model | str | The default model to use for image generation - ignores model set in request |
|
||||||
|
| store_model_in_db | boolean | If true, allows `/model/new` endpoint to store model information in db. Endpoint disabled by default. [Doc on `/model/new` endpoint](./model_management.md#create-a-new-model) |
|
||||||
|
| max_request_size_mb | int | The maximum size for requests in MB. Requests above this size will be rejected. |
|
||||||
|
| max_response_size_mb | int | The maximum size for responses in MB. LLM Responses above this size will not be sent. |
|
||||||
|
| proxy_budget_rescheduler_min_time | int | The minimum time (in seconds) to wait before checking db for budget resets. **Default is 597 seconds** |
|
||||||
|
| proxy_budget_rescheduler_max_time | int | The maximum time (in seconds) to wait before checking db for budget resets. **Default is 605 seconds** |
|
||||||
|
| proxy_batch_write_at | int | Time (in seconds) to wait before batch writing spend logs to the db. **Default is 10 seconds** |
|
||||||
|
| alerting_args | dict | Args for Slack Alerting [Doc on Slack Alerting](./alerting.md) |
|
||||||
|
| custom_key_generate | str | Custom function for key generation [Doc on custom key generation](./virtual_keys.md#custom--key-generate) |
|
||||||
|
| allowed_ips | List[str] | List of IPs allowed to access the proxy. If not set, all IPs are allowed. |
|
||||||
|
| embedding_model | str | The default model to use for embeddings - ignores model set in request |
|
||||||
|
| default_team_disabled | boolean | If true, users cannot create 'personal' keys (keys with no team_id). |
|
||||||
|
| alert_to_webhook_url | Dict[str] | [Specify a webhook url for each alert type.](./alerting.md#set-specific-slack-channels-per-alert-type) |
|
||||||
|
| key_management_settings | List[Dict[str, Any]] | Settings for key management system (e.g. AWS KMS, Azure Key Vault) [Doc on key management](../secret.md) |
|
||||||
|
| allow_user_auth | boolean | (Deprecated) old approach for user authentication. |
|
||||||
|
| user_api_key_cache_ttl | int | The time (in seconds) to cache user api keys in memory. |
|
||||||
|
| disable_prisma_schema_update | boolean | If true, turns off automatic schema updates to DB |
|
||||||
|
| litellm_key_header_name | str | If set, allows passing LiteLLM keys as a custom header. [Doc on custom headers](./virtual_keys.md#custom-headers) |
|
||||||
|
| moderation_model | str | The default model to use for moderation. |
|
||||||
|
| custom_sso | str | Path to a python file that implements custom SSO logic. [Doc on custom SSO](./custom_sso.md) |
|
||||||
|
| allow_client_side_credentials | boolean | If true, allows passing client side credentials to the proxy. (Useful when testing finetuning models) [Doc on client side credentials](./virtual_keys.md#client-side-credentials) |
|
||||||
|
| admin_only_routes | List[str] | (Enterprise Feature) List of routes that are only accessible to admin users. [Doc on admin only routes](./enterprise#control-available-public-private-routes) |
|
||||||
|
| use_azure_key_vault | boolean | If true, load keys from azure key vault |
|
||||||
|
| use_google_kms | boolean | If true, load keys from google kms |
|
||||||
|
| spend_report_frequency | str | Specify how often you want a Spend Report to be sent (e.g. "1d", "2d", "30d") [More on this](./alerting.md#spend-report-frequency) |
|
||||||
|
| ui_access_mode | Literal["admin_only"] | If set, restricts access to the UI to admin users only. [Docs](./ui.md#restrict-ui-access) |
|
||||||
|
| litellm_jwtauth | Dict[str, Any] | Settings for JWT authentication. [Docs](./token_auth.md) |
|
||||||
|
| litellm_license | str | The license key for the proxy. [Docs](../enterprise.md#how-does-deployment-with-enterprise-license-work) |
|
||||||
|
| oauth2_config_mappings | Dict[str, str] | Define the OAuth2 config mappings |
|
||||||
|
| pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) |
|
||||||
|
| enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
|
||||||
|
| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). |
|
||||||
|
| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call |
|
||||||
|
|
||||||
|
### router_settings - Reference
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Most values can also be set via `litellm_settings`. If you see overlapping values, settings on `router_settings` will override those on `litellm_settings`.
|
||||||
|
:::
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
router_settings:
|
||||||
|
routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
||||||
|
redis_host: <your-redis-host> # string
|
||||||
|
redis_password: <your-redis-password> # string
|
||||||
|
redis_port: <your-redis-port> # string
|
||||||
|
enable_pre_call_check: true # bool - Before call is made check if a call is within model context window
|
||||||
|
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||||
|
cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
|
||||||
|
disable_cooldowns: True # bool - Disable cooldowns for all models
|
||||||
|
enable_tag_filtering: True # bool - Use tag based routing for requests
|
||||||
|
retry_policy: { # Dict[str, int]: retry policy for different types of exceptions
|
||||||
|
"AuthenticationErrorRetries": 3,
|
||||||
|
"TimeoutErrorRetries": 3,
|
||||||
|
"RateLimitErrorRetries": 3,
|
||||||
|
"ContentPolicyViolationErrorRetries": 4,
|
||||||
|
"InternalServerErrorRetries": 4
|
||||||
|
}
|
||||||
|
allowed_fails_policy: {
|
||||||
|
"BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
|
||||||
|
"AuthenticationErrorAllowedFails": 10, # int
|
||||||
|
"TimeoutErrorAllowedFails": 12, # int
|
||||||
|
"RateLimitErrorAllowedFails": 10000, # int
|
||||||
|
"ContentPolicyViolationErrorAllowedFails": 15, # int
|
||||||
|
"InternalServerErrorAllowedFails": 20, # int
|
||||||
|
}
|
||||||
|
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations
|
||||||
|
fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors
|
||||||
|
```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| routing_strategy | string | The strategy used for routing requests. Options: "simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing". Default is "simple-shuffle". [More information here](../routing) |
|
||||||
|
| redis_host | string | The host address for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** |
|
||||||
|
| redis_password | string | The password for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** |
|
||||||
|
| redis_port | string | The port number for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them**|
|
||||||
|
| enable_pre_call_check | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) |
|
||||||
|
| content_policy_fallbacks | array of objects | Specifies fallback models for content policy violations. [More information here](reliability) |
|
||||||
|
| fallbacks | array of objects | Specifies fallback models for all types of errors. [More information here](reliability) |
|
||||||
|
| enable_tag_filtering | boolean | If true, uses tag based routing for requests [Tag Based Routing](tag_routing) |
|
||||||
|
| cooldown_time | integer | The duration (in seconds) to cooldown a model if it exceeds the allowed failures. |
|
||||||
|
| disable_cooldowns | boolean | If true, disables cooldowns for all models. [More information here](reliability) |
|
||||||
|
| retry_policy | object | Specifies the number of retries for different types of exceptions. [More information here](reliability) |
|
||||||
|
| allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) |
|
||||||
|
| allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) |
|
||||||
|
|
||||||
|
|
||||||
|
### environment variables - Reference
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
|------|-------------|
|
||||||
|
| ACTIONS_ID_TOKEN_REQUEST_TOKEN | Token for requesting ID in GitHub Actions
|
||||||
|
| ACTIONS_ID_TOKEN_REQUEST_URL | URL for requesting ID token in GitHub Actions
|
||||||
|
| AISPEND_ACCOUNT_ID | Account ID for AI Spend
|
||||||
|
| AISPEND_API_KEY | API Key for AI Spend
|
||||||
|
| ALLOWED_EMAIL_DOMAINS | List of email domains allowed for access
|
||||||
|
| ARIZE_API_KEY | API key for Arize platform integration
|
||||||
|
| ARIZE_SPACE_KEY | Space key for Arize platform
|
||||||
|
| ARGILLA_BATCH_SIZE | Batch size for Argilla logging
|
||||||
|
| ARGILLA_API_KEY | API key for Argilla platform
|
||||||
|
| ARGILLA_SAMPLING_RATE | Sampling rate for Argilla logging
|
||||||
|
| ARGILLA_DATASET_NAME | Dataset name for Argilla logging
|
||||||
|
| ARGILLA_BASE_URL | Base URL for Argilla service
|
||||||
|
| ATHINA_API_KEY | API key for Athina service
|
||||||
|
| AUTH_STRATEGY | Strategy used for authentication (e.g., OAuth, API key)
|
||||||
|
| AWS_ACCESS_KEY_ID | Access Key ID for AWS services
|
||||||
|
| AWS_PROFILE_NAME | AWS CLI profile name to be used
|
||||||
|
| AWS_REGION_NAME | Default AWS region for service interactions
|
||||||
|
| AWS_ROLE_NAME | Role name for AWS IAM usage
|
||||||
|
| AWS_SECRET_ACCESS_KEY | Secret Access Key for AWS services
|
||||||
|
| AWS_SESSION_NAME | Name for AWS session
|
||||||
|
| AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS
|
||||||
|
| AZURE_API_VERSION | Version of the Azure API being used
|
||||||
|
| AZURE_AUTHORITY_HOST | Azure authority host URL
|
||||||
|
| AZURE_CLIENT_ID | Client ID for Azure services
|
||||||
|
| AZURE_CLIENT_SECRET | Client secret for Azure services
|
||||||
|
| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token
|
||||||
|
| AZURE_KEY_VAULT_URI | URI for Azure Key Vault
|
||||||
|
| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
|
||||||
|
| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
|
||||||
|
| BRAINTRUST_API_KEY | API key for Braintrust integration
|
||||||
|
| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI
|
||||||
|
| CIRCLE_OIDC_TOKEN_V2 | Version 2 of the OpenID Connect token for CircleCI
|
||||||
|
| CONFIG_FILE_PATH | File path for configuration file
|
||||||
|
| CUSTOM_TIKTOKEN_CACHE_DIR | Custom directory for Tiktoken cache
|
||||||
|
| DATABASE_HOST | Hostname for the database server
|
||||||
|
| DATABASE_NAME | Name of the database
|
||||||
|
| DATABASE_PASSWORD | Password for the database user
|
||||||
|
| DATABASE_PORT | Port number for database connection
|
||||||
|
| DATABASE_SCHEMA | Schema name used in the database
|
||||||
|
| DATABASE_URL | Connection URL for the database
|
||||||
|
| DATABASE_USER | Username for database connection
|
||||||
|
| DATABASE_USERNAME | Alias for database user
|
||||||
|
| DATABRICKS_API_BASE | Base URL for Databricks API
|
||||||
|
| DD_BASE_URL | Base URL for Datadog integration
|
||||||
|
| DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration
|
||||||
|
| _DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration
|
||||||
|
| DD_API_KEY | API key for Datadog integration
|
||||||
|
| DD_SITE | Site URL for Datadog (e.g., datadoghq.com)
|
||||||
|
| DD_SOURCE | Source identifier for Datadog logs
|
||||||
|
| DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback
|
||||||
|
| DEBUG_OTEL | Enable debug mode for OpenTelemetry
|
||||||
|
| DIRECT_URL | Direct URL for service endpoint
|
||||||
|
| DISABLE_ADMIN_UI | Toggle to disable the admin UI
|
||||||
|
| DISABLE_SCHEMA_UPDATE | Toggle to disable schema updates
|
||||||
|
| DOCS_DESCRIPTION | Description text for documentation pages
|
||||||
|
| DOCS_FILTERED | Flag indicating filtered documentation
|
||||||
|
| DOCS_TITLE | Title of the documentation pages
|
||||||
|
| DOCS_URL | The path to the Swagger API documentation. **By default this is "/"**
|
||||||
|
| EMAIL_SUPPORT_CONTACT | Support contact email address
|
||||||
|
| GCS_BUCKET_NAME | Name of the Google Cloud Storage bucket
|
||||||
|
| GCS_PATH_SERVICE_ACCOUNT | Path to the Google Cloud service account JSON file
|
||||||
|
| GCS_FLUSH_INTERVAL | Flush interval for GCS logging (in seconds). Specify how often you want a log to be sent to GCS. **Default is 20 seconds**
|
||||||
|
| GCS_BATCH_SIZE | Batch size for GCS logging. Specify after how many logs you want to flush to GCS. If `BATCH_SIZE` is set to 10, logs are flushed every 10 logs. **Default is 2048**
|
||||||
|
| GENERIC_AUTHORIZATION_ENDPOINT | Authorization endpoint for generic OAuth providers
|
||||||
|
| GENERIC_CLIENT_ID | Client ID for generic OAuth providers
|
||||||
|
| GENERIC_CLIENT_SECRET | Client secret for generic OAuth providers
|
||||||
|
| GENERIC_CLIENT_STATE | State parameter for generic client authentication
|
||||||
|
| GENERIC_INCLUDE_CLIENT_ID | Include client ID in requests for OAuth
|
||||||
|
| GENERIC_SCOPE | Scope settings for generic OAuth providers
|
||||||
|
| GENERIC_TOKEN_ENDPOINT | Token endpoint for generic OAuth providers
|
||||||
|
| GENERIC_USER_DISPLAY_NAME_ATTRIBUTE | Attribute for user's display name in generic auth
|
||||||
|
| GENERIC_USER_EMAIL_ATTRIBUTE | Attribute for user's email in generic auth
|
||||||
|
| GENERIC_USER_FIRST_NAME_ATTRIBUTE | Attribute for user's first name in generic auth
|
||||||
|
| GENERIC_USER_ID_ATTRIBUTE | Attribute for user ID in generic auth
|
||||||
|
| GENERIC_USER_LAST_NAME_ATTRIBUTE | Attribute for user's last name in generic auth
|
||||||
|
| GENERIC_USER_PROVIDER_ATTRIBUTE | Attribute specifying the user's provider
|
||||||
|
| GENERIC_USER_ROLE_ATTRIBUTE | Attribute specifying the user's role
|
||||||
|
| GENERIC_USERINFO_ENDPOINT | Endpoint to fetch user information in generic OAuth
|
||||||
|
| GALILEO_BASE_URL | Base URL for Galileo platform
|
||||||
|
| GALILEO_PASSWORD | Password for Galileo authentication
|
||||||
|
| GALILEO_PROJECT_ID | Project ID for Galileo usage
|
||||||
|
| GALILEO_USERNAME | Username for Galileo authentication
|
||||||
|
| GREENSCALE_API_KEY | API key for Greenscale service
|
||||||
|
| GREENSCALE_ENDPOINT | Endpoint URL for Greenscale service
|
||||||
|
| GOOGLE_APPLICATION_CREDENTIALS | Path to Google Cloud credentials JSON file
|
||||||
|
| GOOGLE_CLIENT_ID | Client ID for Google OAuth
|
||||||
|
| GOOGLE_CLIENT_SECRET | Client secret for Google OAuth
|
||||||
|
| GOOGLE_KMS_RESOURCE_NAME | Name of the resource in Google KMS
|
||||||
|
| HF_API_BASE | Base URL for Hugging Face API
|
||||||
|
| HELICONE_API_KEY | API key for Helicone service
|
||||||
|
| HUGGINGFACE_API_BASE | Base URL for Hugging Face API
|
||||||
|
| IAM_TOKEN_DB_AUTH | IAM token for database authentication
|
||||||
|
| JSON_LOGS | Enable JSON formatted logging
|
||||||
|
| JWT_AUDIENCE | Expected audience for JWT tokens
|
||||||
|
| JWT_PUBLIC_KEY_URL | URL to fetch public key for JWT verification
|
||||||
|
| LAGO_API_BASE | Base URL for Lago API
|
||||||
|
| LAGO_API_CHARGE_BY | Parameter to determine charge basis in Lago
|
||||||
|
| LAGO_API_EVENT_CODE | Event code for Lago API events
|
||||||
|
| LAGO_API_KEY | API key for accessing Lago services
|
||||||
|
| LANGFUSE_DEBUG | Toggle debug mode for Langfuse
|
||||||
|
| LANGFUSE_FLUSH_INTERVAL | Interval for flushing Langfuse logs
|
||||||
|
| LANGFUSE_HOST | Host URL for Langfuse service
|
||||||
|
| LANGFUSE_PUBLIC_KEY | Public key for Langfuse authentication
|
||||||
|
| LANGFUSE_RELEASE | Release version of Langfuse integration
|
||||||
|
| LANGFUSE_SECRET_KEY | Secret key for Langfuse authentication
|
||||||
|
| LANGSMITH_API_KEY | API key for Langsmith platform
|
||||||
|
| LANGSMITH_BASE_URL | Base URL for Langsmith service
|
||||||
|
| LANGSMITH_BATCH_SIZE | Batch size for operations in Langsmith
|
||||||
|
| LANGSMITH_DEFAULT_RUN_NAME | Default name for Langsmith run
|
||||||
|
| LANGSMITH_PROJECT | Project name for Langsmith integration
|
||||||
|
| LANGSMITH_SAMPLING_RATE | Sampling rate for Langsmith logging
|
||||||
|
| LANGTRACE_API_KEY | API key for Langtrace service
|
||||||
|
| LITERAL_API_KEY | API key for Literal integration
|
||||||
|
| LITERAL_API_URL | API URL for Literal service
|
||||||
|
| LITERAL_BATCH_SIZE | Batch size for Literal operations
|
||||||
|
| LITELLM_DONT_SHOW_FEEDBACK_BOX | Flag to hide feedback box in LiteLLM UI
|
||||||
|
| LITELLM_DROP_PARAMS | Parameters to drop in LiteLLM requests
|
||||||
|
| LITELLM_EMAIL | Email associated with LiteLLM account
|
||||||
|
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM
|
||||||
|
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM
|
||||||
|
| LITELLM_HOSTED_UI | URL of the hosted UI for LiteLLM
|
||||||
|
| LITELLM_LICENSE | License key for LiteLLM usage
|
||||||
|
| LITELLM_LOCAL_MODEL_COST_MAP | Local configuration for model cost mapping in LiteLLM
|
||||||
|
| LITELLM_LOG | Enable detailed logging for LiteLLM
|
||||||
|
| LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development)
|
||||||
|
| LITELLM_SALT_KEY | Salt key for encryption in LiteLLM
|
||||||
|
| LITELLM_SECRET_AWS_KMS_LITELLM_LICENSE | AWS KMS encrypted license for LiteLLM
|
||||||
|
| LITELLM_TOKEN | Access token for LiteLLM integration
|
||||||
|
| LOGFIRE_TOKEN | Token for Logfire logging service
|
||||||
|
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
|
||||||
|
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
|
||||||
|
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure
|
||||||
|
| NO_DOCS | Flag to disable documentation generation
|
||||||
|
| NO_PROXY | List of addresses to bypass proxy
|
||||||
|
| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval
|
||||||
|
| OPENAI_API_BASE | Base URL for OpenAI API
|
||||||
|
| OPENAI_API_KEY | API key for OpenAI services
|
||||||
|
| OPENAI_ORGANIZATION | Organization identifier for OpenAI
|
||||||
|
| OPENID_BASE_URL | Base URL for OpenID Connect services
|
||||||
|
| OPENID_CLIENT_ID | Client ID for OpenID Connect authentication
|
||||||
|
| OPENID_CLIENT_SECRET | Client secret for OpenID Connect authentication
|
||||||
|
| OPENMETER_API_ENDPOINT | API endpoint for OpenMeter integration
|
||||||
|
| OPENMETER_API_KEY | API key for OpenMeter services
|
||||||
|
| OPENMETER_EVENT_TYPE | Type of events sent to OpenMeter
|
||||||
|
| OTEL_ENDPOINT | OpenTelemetry endpoint for traces
|
||||||
|
| OTEL_ENVIRONMENT_NAME | Environment name for OpenTelemetry
|
||||||
|
| OTEL_EXPORTER | Exporter type for OpenTelemetry
|
||||||
|
| OTEL_HEADERS | Headers for OpenTelemetry requests
|
||||||
|
| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry
|
||||||
|
| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing
|
||||||
|
| PREDIBASE_API_BASE | Base URL for Predibase API
|
||||||
|
| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service
|
||||||
|
| PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service
|
||||||
|
| PROMETHEUS_URL | URL for Prometheus service
|
||||||
|
| PROMPTLAYER_API_KEY | API key for PromptLayer integration
|
||||||
|
| PROXY_ADMIN_ID | Admin identifier for proxy server
|
||||||
|
| PROXY_BASE_URL | Base URL for proxy service
|
||||||
|
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
|
||||||
|
| PROXY_MASTER_KEY | Master key for proxy authentication
|
||||||
|
| QDRANT_API_BASE | Base URL for Qdrant API
|
||||||
|
| QDRANT_API_KEY | API key for Qdrant service
|
||||||
|
| QDRANT_URL | Connection URL for Qdrant database
|
||||||
|
| REDIS_HOST | Hostname for Redis server
|
||||||
|
| REDIS_PASSWORD | Password for Redis service
|
||||||
|
| REDIS_PORT | Port number for Redis server
|
||||||
|
| REDOC_URL | The path to the Redoc Fast API documentation. **By default this is "/redoc"**
|
||||||
|
| SERVER_ROOT_PATH | Root path for the server application
|
||||||
|
| SET_VERBOSE | Flag to enable verbose logging
|
||||||
|
| SLACK_DAILY_REPORT_FREQUENCY | Frequency of daily Slack reports (e.g., daily, weekly)
|
||||||
|
| SLACK_WEBHOOK_URL | Webhook URL for Slack integration
|
||||||
|
| SMTP_HOST | Hostname for the SMTP server
|
||||||
|
| SMTP_PASSWORD | Password for SMTP authentication
|
||||||
|
| SMTP_PORT | Port number for SMTP server
|
||||||
|
| SMTP_SENDER_EMAIL | Email address used as the sender in SMTP transactions
|
||||||
|
| SMTP_SENDER_LOGO | Logo used in emails sent via SMTP
|
||||||
|
| SMTP_TLS | Flag to enable or disable TLS for SMTP connections
|
||||||
|
| SMTP_USERNAME | Username for SMTP authentication
|
||||||
|
| SPEND_LOGS_URL | URL for retrieving spend logs
|
||||||
|
| SSL_CERTIFICATE | Path to the SSL certificate file
|
||||||
|
| SSL_VERIFY | Flag to enable or disable SSL certificate verification
|
||||||
|
| SUPABASE_KEY | API key for Supabase service
|
||||||
|
| SUPABASE_URL | Base URL for Supabase instance
|
||||||
|
| TEST_EMAIL_ADDRESS | Email address used for testing purposes
|
||||||
|
| UI_LOGO_PATH | Path to the logo image used in the UI
|
||||||
|
| UI_PASSWORD | Password for accessing the UI
|
||||||
|
| UI_USERNAME | Username for accessing the UI
|
||||||
|
| UPSTREAM_LANGFUSE_DEBUG | Flag to enable debugging for upstream Langfuse
|
||||||
|
| UPSTREAM_LANGFUSE_HOST | Host URL for upstream Langfuse service
|
||||||
|
| UPSTREAM_LANGFUSE_PUBLIC_KEY | Public key for upstream Langfuse authentication
|
||||||
|
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
|
||||||
|
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
|
||||||
|
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
|
||||||
|
| WEBHOOK_URL | URL for receiving webhooks from external services
|
||||||
|
|
|
@ -597,481 +597,6 @@ general_settings:
|
||||||
database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db
|
database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db
|
||||||
```
|
```
|
||||||
|
|
||||||
## **All settings**
|
|
||||||
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
environment_variables: {}
|
|
||||||
|
|
||||||
model_list:
|
|
||||||
- model_name: string
|
|
||||||
litellm_params: {}
|
|
||||||
model_info:
|
|
||||||
id: string
|
|
||||||
mode: embedding
|
|
||||||
input_cost_per_token: 0
|
|
||||||
output_cost_per_token: 0
|
|
||||||
max_tokens: 2048
|
|
||||||
base_model: gpt-4-1106-preview
|
|
||||||
additionalProp1: {}
|
|
||||||
|
|
||||||
litellm_settings:
|
|
||||||
# Logging/Callback settings
|
|
||||||
success_callback: ["langfuse"] # list of success callbacks
|
|
||||||
failure_callback: ["sentry"] # list of failure callbacks
|
|
||||||
callbacks: ["otel"] # list of callbacks - runs on success and failure
|
|
||||||
service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus
|
|
||||||
turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
|
|
||||||
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
|
||||||
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
|
|
||||||
|
|
||||||
# Networking settings
|
|
||||||
request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
|
|
||||||
force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API
|
|
||||||
|
|
||||||
set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
|
|
||||||
json_logs: boolean # if true, logs will be in json format
|
|
||||||
|
|
||||||
# Fallbacks, reliability
|
|
||||||
default_fallbacks: ["claude-opus"] # set default_fallbacks, in case a specific model group is misconfigured / bad.
|
|
||||||
content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}] # fallbacks for ContentPolicyErrors
|
|
||||||
context_window_fallbacks: [{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}] # fallbacks for ContextWindowExceededErrors
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Caching settings
|
|
||||||
cache: true
|
|
||||||
cache_params: # set cache params for redis
|
|
||||||
type: redis # type of cache to initialize
|
|
||||||
|
|
||||||
# Optional - Redis Settings
|
|
||||||
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
|
|
||||||
port: 6379 # The port number for the Redis cache. Required if type is "redis".
|
|
||||||
password: "your_password" # The password for the Redis cache. Required if type is "redis".
|
|
||||||
namespace: "litellm.caching.caching" # namespace for redis cache
|
|
||||||
|
|
||||||
# Optional - Redis Cluster Settings
|
|
||||||
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
|
|
||||||
|
|
||||||
# Optional - Redis Sentinel Settings
|
|
||||||
service_name: "mymaster"
|
|
||||||
sentinel_nodes: [["localhost", 26379]]
|
|
||||||
|
|
||||||
# Optional - Qdrant Semantic Cache Settings
|
|
||||||
qdrant_semantic_cache_embedding_model: openai-embedding # the model should be defined on the model_list
|
|
||||||
qdrant_collection_name: test_collection
|
|
||||||
qdrant_quantization_config: binary
|
|
||||||
similarity_threshold: 0.8 # similarity threshold for semantic cache
|
|
||||||
|
|
||||||
# Optional - S3 Cache Settings
|
|
||||||
s3_bucket_name: cache-bucket-litellm # AWS Bucket Name for S3
|
|
||||||
s3_region_name: us-west-2 # AWS Region Name for S3
|
|
||||||
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
|
|
||||||
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
|
|
||||||
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 bucket
|
|
||||||
|
|
||||||
# Common Cache settings
|
|
||||||
# Optional - Supported call types for caching
|
|
||||||
supported_call_types: ["acompletion", "atext_completion", "aembedding", "atranscription"]
|
|
||||||
# /chat/completions, /completions, /embeddings, /audio/transcriptions
|
|
||||||
mode: default_off # if default_off, you need to opt in to caching on a per call basis
|
|
||||||
ttl: 600 # ttl for caching
|
|
||||||
|
|
||||||
|
|
||||||
callback_settings:
|
|
||||||
otel:
|
|
||||||
message_logging: boolean # OTEL logging callback specific settings
|
|
||||||
|
|
||||||
general_settings:
|
|
||||||
completion_model: string
|
|
||||||
disable_spend_logs: boolean # turn off writing each transaction to the db
|
|
||||||
disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint)
|
|
||||||
disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached
|
|
||||||
disable_reset_budget: boolean # turn off reset budget scheduled task
|
|
||||||
disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking
|
|
||||||
enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
|
|
||||||
enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param
|
|
||||||
allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
|
|
||||||
key_management_system: google_kms # either google_kms or azure_kms
|
|
||||||
master_key: string
|
|
||||||
|
|
||||||
# Database Settings
|
|
||||||
database_url: string
|
|
||||||
database_connection_pool_limit: 0 # default 100
|
|
||||||
database_connection_timeout: 0 # default 60s
|
|
||||||
allow_requests_on_db_unavailable: boolean # if true, will allow requests that can not connect to the DB to verify Virtual Key to still work
|
|
||||||
|
|
||||||
custom_auth: string
|
|
||||||
max_parallel_requests: 0 # the max parallel requests allowed per deployment
|
|
||||||
global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up
|
|
||||||
infer_model_from_keys: true
|
|
||||||
background_health_checks: true
|
|
||||||
health_check_interval: 300
|
|
||||||
alerting: ["slack", "email"]
|
|
||||||
alerting_threshold: 0
|
|
||||||
use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints
|
|
||||||
```
|
|
||||||
|
|
||||||
### litellm_settings - Reference
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
|------|------|-------------|
|
|
||||||
| success_callback | array of strings | List of success callbacks. [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
|
|
||||||
| failure_callback | array of strings | List of failure callbacks [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
|
|
||||||
| callbacks | array of strings | List of callbacks - runs on success and failure [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) |
|
|
||||||
| service_callbacks | array of strings | System health monitoring - Logs redis, postgres failures on specified services (e.g. datadog, prometheus) [Doc Metrics](prometheus) |
|
|
||||||
| turn_off_message_logging | boolean | If true, prevents messages and responses from being logged to callbacks, but request metadata will still be logged [Proxy Logging](logging) |
|
|
||||||
| modify_params | boolean | If true, allows modifying the parameters of the request before it is sent to the LLM provider |
|
|
||||||
| enable_preview_features | boolean | If true, enables preview features - e.g. Azure O1 Models with streaming support.|
|
|
||||||
| redact_user_api_key_info | boolean | If true, redacts information about the user api key from logs [Proxy Logging](logging#redacting-userapikeyinfo) |
|
|
||||||
| langfuse_default_tags | array of strings | Default tags for Langfuse Logging. Use this if you want to control which LiteLLM-specific fields are logged as tags by the LiteLLM proxy. By default LiteLLM Proxy logs no LiteLLM-specific fields as tags. [Further docs](./logging#litellm-specific-tags-on-langfuse---cache_hit-cache_key) |
|
|
||||||
| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION |
|
|
||||||
| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
|
|
||||||
| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
|
|
||||||
| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) |
|
|
||||||
| force_ipv4 | boolean | If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API |
|
|
||||||
| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
|
|
||||||
| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
|
|
||||||
| cache | boolean | If true, enables caching. [Further docs](./caching) |
|
|
||||||
| cache_params | object | Parameters for the cache. [Further docs](./caching) |
|
|
||||||
| cache_params.type | string | The type of cache to initialize. Can be one of ["local", "redis", "redis-semantic", "s3", "disk", "qdrant-semantic"]. Defaults to "redis". [Furher docs](./caching) |
|
|
||||||
| cache_params.host | string | The host address for the Redis cache. Required if type is "redis". |
|
|
||||||
| cache_params.port | integer | The port number for the Redis cache. Required if type is "redis". |
|
|
||||||
| cache_params.password | string | The password for the Redis cache. Required if type is "redis". |
|
|
||||||
| cache_params.namespace | string | The namespace for the Redis cache. |
|
|
||||||
| cache_params.redis_startup_nodes | array of objects | Redis Cluster Settings. [Further docs](./caching) |
|
|
||||||
| cache_params.service_name | string | Redis Sentinel Settings. [Further docs](./caching) |
|
|
||||||
| cache_params.sentinel_nodes | array of arrays | Redis Sentinel Settings. [Further docs](./caching) |
|
|
||||||
| cache_params.ttl | integer | The time (in seconds) to store entries in cache. |
|
|
||||||
| cache_params.qdrant_semantic_cache_embedding_model | string | The embedding model to use for qdrant semantic cache. |
|
|
||||||
| cache_params.qdrant_collection_name | string | The name of the collection to use for qdrant semantic cache. |
|
|
||||||
| cache_params.qdrant_quantization_config | string | The quantization configuration for the qdrant semantic cache. |
|
|
||||||
| cache_params.similarity_threshold | float | The similarity threshold for the semantic cache. |
|
|
||||||
| cache_params.s3_bucket_name | string | The name of the S3 bucket to use for the semantic cache. |
|
|
||||||
| cache_params.s3_region_name | string | The region name for the S3 bucket. |
|
|
||||||
| cache_params.s3_aws_access_key_id | string | The AWS access key ID for the S3 bucket. |
|
|
||||||
| cache_params.s3_aws_secret_access_key | string | The AWS secret access key for the S3 bucket. |
|
|
||||||
| cache_params.s3_endpoint_url | string | Optional - The endpoint URL for the S3 bucket. |
|
|
||||||
| cache_params.supported_call_types | array of strings | The types of calls to cache. [Further docs](./caching) |
|
|
||||||
| cache_params.mode | string | The mode of the cache. [Further docs](./caching) |
|
|
||||||
| disable_end_user_cost_tracking | boolean | If true, turns off end user cost tracking on prometheus metrics + litellm spend logs table on proxy. |
|
|
||||||
| key_generation_settings | object | Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation) |
|
|
||||||
|
|
||||||
### general_settings - Reference
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
|------|------|-------------|
|
|
||||||
| completion_model | string | The default model to use for completions when `model` is not specified in the request |
|
|
||||||
| disable_spend_logs | boolean | If true, turns off writing each transaction to the database |
|
|
||||||
| disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) |
|
|
||||||
| disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached |
|
|
||||||
| disable_reset_budget | boolean | If true, turns off reset budget scheduled task |
|
|
||||||
| disable_adding_master_key_hash_to_db | boolean | If true, turns off storing master key hash in db |
|
|
||||||
| enable_jwt_auth | boolean | allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims. [Doc on JWT Tokens](token_auth) |
|
|
||||||
| enforce_user_param | boolean | If true, requires all OpenAI endpoint requests to have a 'user' param. [Doc on call hooks](call_hooks)|
|
|
||||||
| allowed_routes | array of strings | List of allowed proxy API routes a user can access [Doc on controlling allowed routes](enterprise#control-available-public-private-routes)|
|
|
||||||
| key_management_system | string | Specifies the key management system. [Doc Secret Managers](../secret) |
|
|
||||||
| master_key | string | The master key for the proxy [Set up Virtual Keys](virtual_keys) |
|
|
||||||
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
|
|
||||||
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
|
|
||||||
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
|
|
||||||
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
|
|
||||||
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
|
|
||||||
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
|
|
||||||
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
|
|
||||||
| infer_model_from_keys | boolean | If true, infers the model from the provided keys |
|
|
||||||
| background_health_checks | boolean | If true, enables background health checks. [Doc on health checks](health) |
|
|
||||||
| health_check_interval | integer | The interval for health checks in seconds [Doc on health checks](health) |
|
|
||||||
| alerting | array of strings | List of alerting methods [Doc on Slack Alerting](alerting) |
|
|
||||||
| alerting_threshold | integer | The threshold for triggering alerts [Doc on Slack Alerting](alerting) |
|
|
||||||
| use_client_credentials_pass_through_routes | boolean | If true, uses client credentials for all pass-through routes. [Doc on pass through routes](pass_through) |
|
|
||||||
| health_check_details | boolean | If false, hides health check details (e.g. remaining rate limit). [Doc on health checks](health) |
|
|
||||||
| public_routes | List[str] | (Enterprise Feature) Control list of public routes |
|
|
||||||
| alert_types | List[str] | Control list of alert types to send to slack (Doc on alert types)[./alerting.md] |
|
|
||||||
| enforced_params | List[str] | (Enterprise Feature) List of params that must be included in all requests to the proxy |
|
|
||||||
| enable_oauth2_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
|
|
||||||
| use_x_forwarded_for | str | If true, uses the X-Forwarded-For header to get the client IP address |
|
|
||||||
| service_account_settings | List[Dict[str, Any]] | Set `service_account_settings` if you want to create settings that only apply to service account keys (Doc on service accounts)[./service_accounts.md] |
|
|
||||||
| image_generation_model | str | The default model to use for image generation - ignores model set in request |
|
|
||||||
| store_model_in_db | boolean | If true, allows `/model/new` endpoint to store model information in db. Endpoint disabled by default. [Doc on `/model/new` endpoint](./model_management.md#create-a-new-model) |
|
|
||||||
| max_request_size_mb | int | The maximum size for requests in MB. Requests above this size will be rejected. |
|
|
||||||
| max_response_size_mb | int | The maximum size for responses in MB. LLM Responses above this size will not be sent. |
|
|
||||||
| proxy_budget_rescheduler_min_time | int | The minimum time (in seconds) to wait before checking db for budget resets. **Default is 597 seconds** |
|
|
||||||
| proxy_budget_rescheduler_max_time | int | The maximum time (in seconds) to wait before checking db for budget resets. **Default is 605 seconds** |
|
|
||||||
| proxy_batch_write_at | int | Time (in seconds) to wait before batch writing spend logs to the db. **Default is 10 seconds** |
|
|
||||||
| alerting_args | dict | Args for Slack Alerting [Doc on Slack Alerting](./alerting.md) |
|
|
||||||
| custom_key_generate | str | Custom function for key generation [Doc on custom key generation](./virtual_keys.md#custom--key-generate) |
|
|
||||||
| allowed_ips | List[str] | List of IPs allowed to access the proxy. If not set, all IPs are allowed. |
|
|
||||||
| embedding_model | str | The default model to use for embeddings - ignores model set in request |
|
|
||||||
| default_team_disabled | boolean | If true, users cannot create 'personal' keys (keys with no team_id). |
|
|
||||||
| alert_to_webhook_url | Dict[str] | [Specify a webhook url for each alert type.](./alerting.md#set-specific-slack-channels-per-alert-type) |
|
|
||||||
| key_management_settings | List[Dict[str, Any]] | Settings for key management system (e.g. AWS KMS, Azure Key Vault) [Doc on key management](../secret.md) |
|
|
||||||
| allow_user_auth | boolean | (Deprecated) old approach for user authentication. |
|
|
||||||
| user_api_key_cache_ttl | int | The time (in seconds) to cache user api keys in memory. |
|
|
||||||
| disable_prisma_schema_update | boolean | If true, turns off automatic schema updates to DB |
|
|
||||||
| litellm_key_header_name | str | If set, allows passing LiteLLM keys as a custom header. [Doc on custom headers](./virtual_keys.md#custom-headers) |
|
|
||||||
| moderation_model | str | The default model to use for moderation. |
|
|
||||||
| custom_sso | str | Path to a python file that implements custom SSO logic. [Doc on custom SSO](./custom_sso.md) |
|
|
||||||
| allow_client_side_credentials | boolean | If true, allows passing client side credentials to the proxy. (Useful when testing finetuning models) [Doc on client side credentials](./virtual_keys.md#client-side-credentials) |
|
|
||||||
| admin_only_routes | List[str] | (Enterprise Feature) List of routes that are only accessible to admin users. [Doc on admin only routes](./enterprise#control-available-public-private-routes) |
|
|
||||||
| use_azure_key_vault | boolean | If true, load keys from azure key vault |
|
|
||||||
| use_google_kms | boolean | If true, load keys from google kms |
|
|
||||||
| spend_report_frequency | str | Specify how often you want a Spend Report to be sent (e.g. "1d", "2d", "30d") [More on this](./alerting.md#spend-report-frequency) |
|
|
||||||
| ui_access_mode | Literal["admin_only"] | If set, restricts access to the UI to admin users only. [Docs](./ui.md#restrict-ui-access) |
|
|
||||||
| litellm_jwtauth | Dict[str, Any] | Settings for JWT authentication. [Docs](./token_auth.md) |
|
|
||||||
| litellm_license | str | The license key for the proxy. [Docs](../enterprise.md#how-does-deployment-with-enterprise-license-work) |
|
|
||||||
| oauth2_config_mappings | Dict[str, str] | Define the OAuth2 config mappings |
|
|
||||||
| pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) |
|
|
||||||
| enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
|
|
||||||
| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). |
|
|
||||||
| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call |
|
|
||||||
|
|
||||||
### router_settings - Reference
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
router_settings:
|
|
||||||
routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
|
||||||
redis_host: <your-redis-host> # string
|
|
||||||
redis_password: <your-redis-password> # string
|
|
||||||
redis_port: <your-redis-port> # string
|
|
||||||
enable_pre_call_check: true # bool - Before call is made check if a call is within model context window
|
|
||||||
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
|
||||||
cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
|
|
||||||
disable_cooldowns: True # bool - Disable cooldowns for all models
|
|
||||||
enable_tag_filtering: True # bool - Use tag based routing for requests
|
|
||||||
retry_policy: { # Dict[str, int]: retry policy for different types of exceptions
|
|
||||||
"AuthenticationErrorRetries": 3,
|
|
||||||
"TimeoutErrorRetries": 3,
|
|
||||||
"RateLimitErrorRetries": 3,
|
|
||||||
"ContentPolicyViolationErrorRetries": 4,
|
|
||||||
"InternalServerErrorRetries": 4
|
|
||||||
}
|
|
||||||
allowed_fails_policy: {
|
|
||||||
"BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
|
|
||||||
"AuthenticationErrorAllowedFails": 10, # int
|
|
||||||
"TimeoutErrorAllowedFails": 12, # int
|
|
||||||
"RateLimitErrorAllowedFails": 10000, # int
|
|
||||||
"ContentPolicyViolationErrorAllowedFails": 15, # int
|
|
||||||
"InternalServerErrorAllowedFails": 20, # int
|
|
||||||
}
|
|
||||||
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations
|
|
||||||
fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors
|
|
||||||
```
|
|
||||||
|
|
||||||
| Name | Type | Description |
|
|
||||||
|------|------|-------------|
|
|
||||||
| routing_strategy | string | The strategy used for routing requests. Options: "simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing". Default is "simple-shuffle". [More information here](../routing) |
|
|
||||||
| redis_host | string | The host address for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** |
|
|
||||||
| redis_password | string | The password for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** |
|
|
||||||
| redis_port | string | The port number for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them**|
|
|
||||||
| enable_pre_call_check | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) |
|
|
||||||
| content_policy_fallbacks | array of objects | Specifies fallback models for content policy violations. [More information here](reliability) |
|
|
||||||
| fallbacks | array of objects | Specifies fallback models for all types of errors. [More information here](reliability) |
|
|
||||||
| enable_tag_filtering | boolean | If true, uses tag based routing for requests [Tag Based Routing](tag_routing) |
|
|
||||||
| cooldown_time | integer | The duration (in seconds) to cooldown a model if it exceeds the allowed failures. |
|
|
||||||
| disable_cooldowns | boolean | If true, disables cooldowns for all models. [More information here](reliability) |
|
|
||||||
| retry_policy | object | Specifies the number of retries for different types of exceptions. [More information here](reliability) |
|
|
||||||
| allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) |
|
|
||||||
| allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) |
|
|
||||||
|
|
||||||
|
|
||||||
### environment variables - Reference
|
|
||||||
|
|
||||||
| Name | Description |
|
|
||||||
|------|-------------|
|
|
||||||
| ACTIONS_ID_TOKEN_REQUEST_TOKEN | Token for requesting ID in GitHub Actions
|
|
||||||
| ACTIONS_ID_TOKEN_REQUEST_URL | URL for requesting ID token in GitHub Actions
|
|
||||||
| AISPEND_ACCOUNT_ID | Account ID for AI Spend
|
|
||||||
| AISPEND_API_KEY | API Key for AI Spend
|
|
||||||
| ALLOWED_EMAIL_DOMAINS | List of email domains allowed for access
|
|
||||||
| ARIZE_API_KEY | API key for Arize platform integration
|
|
||||||
| ARIZE_SPACE_KEY | Space key for Arize platform
|
|
||||||
| ARGILLA_BATCH_SIZE | Batch size for Argilla logging
|
|
||||||
| ARGILLA_API_KEY | API key for Argilla platform
|
|
||||||
| ARGILLA_SAMPLING_RATE | Sampling rate for Argilla logging
|
|
||||||
| ARGILLA_DATASET_NAME | Dataset name for Argilla logging
|
|
||||||
| ARGILLA_BASE_URL | Base URL for Argilla service
|
|
||||||
| ATHINA_API_KEY | API key for Athina service
|
|
||||||
| AUTH_STRATEGY | Strategy used for authentication (e.g., OAuth, API key)
|
|
||||||
| AWS_ACCESS_KEY_ID | Access Key ID for AWS services
|
|
||||||
| AWS_PROFILE_NAME | AWS CLI profile name to be used
|
|
||||||
| AWS_REGION_NAME | Default AWS region for service interactions
|
|
||||||
| AWS_ROLE_NAME | Role name for AWS IAM usage
|
|
||||||
| AWS_SECRET_ACCESS_KEY | Secret Access Key for AWS services
|
|
||||||
| AWS_SESSION_NAME | Name for AWS session
|
|
||||||
| AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS
|
|
||||||
| AZURE_API_VERSION | Version of the Azure API being used
|
|
||||||
| AZURE_AUTHORITY_HOST | Azure authority host URL
|
|
||||||
| AZURE_CLIENT_ID | Client ID for Azure services
|
|
||||||
| AZURE_CLIENT_SECRET | Client secret for Azure services
|
|
||||||
| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token
|
|
||||||
| AZURE_KEY_VAULT_URI | URI for Azure Key Vault
|
|
||||||
| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
|
|
||||||
| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
|
|
||||||
| BRAINTRUST_API_KEY | API key for Braintrust integration
|
|
||||||
| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI
|
|
||||||
| CIRCLE_OIDC_TOKEN_V2 | Version 2 of the OpenID Connect token for CircleCI
|
|
||||||
| CONFIG_FILE_PATH | File path for configuration file
|
|
||||||
| CUSTOM_TIKTOKEN_CACHE_DIR | Custom directory for Tiktoken cache
|
|
||||||
| DATABASE_HOST | Hostname for the database server
|
|
||||||
| DATABASE_NAME | Name of the database
|
|
||||||
| DATABASE_PASSWORD | Password for the database user
|
|
||||||
| DATABASE_PORT | Port number for database connection
|
|
||||||
| DATABASE_SCHEMA | Schema name used in the database
|
|
||||||
| DATABASE_URL | Connection URL for the database
|
|
||||||
| DATABASE_USER | Username for database connection
|
|
||||||
| DATABASE_USERNAME | Alias for database user
|
|
||||||
| DATABRICKS_API_BASE | Base URL for Databricks API
|
|
||||||
| DD_BASE_URL | Base URL for Datadog integration
|
|
||||||
| DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration
|
|
||||||
| _DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration
|
|
||||||
| DD_API_KEY | API key for Datadog integration
|
|
||||||
| DD_SITE | Site URL for Datadog (e.g., datadoghq.com)
|
|
||||||
| DD_SOURCE | Source identifier for Datadog logs
|
|
||||||
| DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback
|
|
||||||
| DEBUG_OTEL | Enable debug mode for OpenTelemetry
|
|
||||||
| DIRECT_URL | Direct URL for service endpoint
|
|
||||||
| DISABLE_ADMIN_UI | Toggle to disable the admin UI
|
|
||||||
| DISABLE_SCHEMA_UPDATE | Toggle to disable schema updates
|
|
||||||
| DOCS_DESCRIPTION | Description text for documentation pages
|
|
||||||
| DOCS_FILTERED | Flag indicating filtered documentation
|
|
||||||
| DOCS_TITLE | Title of the documentation pages
|
|
||||||
| DOCS_URL | The path to the Swagger API documentation. **By default this is "/"**
|
|
||||||
| EMAIL_SUPPORT_CONTACT | Support contact email address
|
|
||||||
| GCS_BUCKET_NAME | Name of the Google Cloud Storage bucket
|
|
||||||
| GCS_PATH_SERVICE_ACCOUNT | Path to the Google Cloud service account JSON file
|
|
||||||
| GCS_FLUSH_INTERVAL | Flush interval for GCS logging (in seconds). Specify how often you want a log to be sent to GCS. **Default is 20 seconds**
|
|
||||||
| GCS_BATCH_SIZE | Batch size for GCS logging. Specify after how many logs you want to flush to GCS. If `BATCH_SIZE` is set to 10, logs are flushed every 10 logs. **Default is 2048**
|
|
||||||
| GENERIC_AUTHORIZATION_ENDPOINT | Authorization endpoint for generic OAuth providers
|
|
||||||
| GENERIC_CLIENT_ID | Client ID for generic OAuth providers
|
|
||||||
| GENERIC_CLIENT_SECRET | Client secret for generic OAuth providers
|
|
||||||
| GENERIC_CLIENT_STATE | State parameter for generic client authentication
|
|
||||||
| GENERIC_INCLUDE_CLIENT_ID | Include client ID in requests for OAuth
|
|
||||||
| GENERIC_SCOPE | Scope settings for generic OAuth providers
|
|
||||||
| GENERIC_TOKEN_ENDPOINT | Token endpoint for generic OAuth providers
|
|
||||||
| GENERIC_USER_DISPLAY_NAME_ATTRIBUTE | Attribute for user's display name in generic auth
|
|
||||||
| GENERIC_USER_EMAIL_ATTRIBUTE | Attribute for user's email in generic auth
|
|
||||||
| GENERIC_USER_FIRST_NAME_ATTRIBUTE | Attribute for user's first name in generic auth
|
|
||||||
| GENERIC_USER_ID_ATTRIBUTE | Attribute for user ID in generic auth
|
|
||||||
| GENERIC_USER_LAST_NAME_ATTRIBUTE | Attribute for user's last name in generic auth
|
|
||||||
| GENERIC_USER_PROVIDER_ATTRIBUTE | Attribute specifying the user's provider
|
|
||||||
| GENERIC_USER_ROLE_ATTRIBUTE | Attribute specifying the user's role
|
|
||||||
| GENERIC_USERINFO_ENDPOINT | Endpoint to fetch user information in generic OAuth
|
|
||||||
| GALILEO_BASE_URL | Base URL for Galileo platform
|
|
||||||
| GALILEO_PASSWORD | Password for Galileo authentication
|
|
||||||
| GALILEO_PROJECT_ID | Project ID for Galileo usage
|
|
||||||
| GALILEO_USERNAME | Username for Galileo authentication
|
|
||||||
| GREENSCALE_API_KEY | API key for Greenscale service
|
|
||||||
| GREENSCALE_ENDPOINT | Endpoint URL for Greenscale service
|
|
||||||
| GOOGLE_APPLICATION_CREDENTIALS | Path to Google Cloud credentials JSON file
|
|
||||||
| GOOGLE_CLIENT_ID | Client ID for Google OAuth
|
|
||||||
| GOOGLE_CLIENT_SECRET | Client secret for Google OAuth
|
|
||||||
| GOOGLE_KMS_RESOURCE_NAME | Name of the resource in Google KMS
|
|
||||||
| HF_API_BASE | Base URL for Hugging Face API
|
|
||||||
| HELICONE_API_KEY | API key for Helicone service
|
|
||||||
| HUGGINGFACE_API_BASE | Base URL for Hugging Face API
|
|
||||||
| IAM_TOKEN_DB_AUTH | IAM token for database authentication
|
|
||||||
| JSON_LOGS | Enable JSON formatted logging
|
|
||||||
| JWT_AUDIENCE | Expected audience for JWT tokens
|
|
||||||
| JWT_PUBLIC_KEY_URL | URL to fetch public key for JWT verification
|
|
||||||
| LAGO_API_BASE | Base URL for Lago API
|
|
||||||
| LAGO_API_CHARGE_BY | Parameter to determine charge basis in Lago
|
|
||||||
| LAGO_API_EVENT_CODE | Event code for Lago API events
|
|
||||||
| LAGO_API_KEY | API key for accessing Lago services
|
|
||||||
| LANGFUSE_DEBUG | Toggle debug mode for Langfuse
|
|
||||||
| LANGFUSE_FLUSH_INTERVAL | Interval for flushing Langfuse logs
|
|
||||||
| LANGFUSE_HOST | Host URL for Langfuse service
|
|
||||||
| LANGFUSE_PUBLIC_KEY | Public key for Langfuse authentication
|
|
||||||
| LANGFUSE_RELEASE | Release version of Langfuse integration
|
|
||||||
| LANGFUSE_SECRET_KEY | Secret key for Langfuse authentication
|
|
||||||
| LANGSMITH_API_KEY | API key for Langsmith platform
|
|
||||||
| LANGSMITH_BASE_URL | Base URL for Langsmith service
|
|
||||||
| LANGSMITH_BATCH_SIZE | Batch size for operations in Langsmith
|
|
||||||
| LANGSMITH_DEFAULT_RUN_NAME | Default name for Langsmith run
|
|
||||||
| LANGSMITH_PROJECT | Project name for Langsmith integration
|
|
||||||
| LANGSMITH_SAMPLING_RATE | Sampling rate for Langsmith logging
|
|
||||||
| LANGTRACE_API_KEY | API key for Langtrace service
|
|
||||||
| LITERAL_API_KEY | API key for Literal integration
|
|
||||||
| LITERAL_API_URL | API URL for Literal service
|
|
||||||
| LITERAL_BATCH_SIZE | Batch size for Literal operations
|
|
||||||
| LITELLM_DONT_SHOW_FEEDBACK_BOX | Flag to hide feedback box in LiteLLM UI
|
|
||||||
| LITELLM_DROP_PARAMS | Parameters to drop in LiteLLM requests
|
|
||||||
| LITELLM_EMAIL | Email associated with LiteLLM account
|
|
||||||
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM
|
|
||||||
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM
|
|
||||||
| LITELLM_HOSTED_UI | URL of the hosted UI for LiteLLM
|
|
||||||
| LITELLM_LICENSE | License key for LiteLLM usage
|
|
||||||
| LITELLM_LOCAL_MODEL_COST_MAP | Local configuration for model cost mapping in LiteLLM
|
|
||||||
| LITELLM_LOG | Enable detailed logging for LiteLLM
|
|
||||||
| LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development)
|
|
||||||
| LITELLM_SALT_KEY | Salt key for encryption in LiteLLM
|
|
||||||
| LITELLM_SECRET_AWS_KMS_LITELLM_LICENSE | AWS KMS encrypted license for LiteLLM
|
|
||||||
| LITELLM_TOKEN | Access token for LiteLLM integration
|
|
||||||
| LOGFIRE_TOKEN | Token for Logfire logging service
|
|
||||||
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
|
|
||||||
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
|
|
||||||
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure
|
|
||||||
| NO_DOCS | Flag to disable documentation generation
|
|
||||||
| NO_PROXY | List of addresses to bypass proxy
|
|
||||||
| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval
|
|
||||||
| OPENAI_API_BASE | Base URL for OpenAI API
|
|
||||||
| OPENAI_API_KEY | API key for OpenAI services
|
|
||||||
| OPENAI_ORGANIZATION | Organization identifier for OpenAI
|
|
||||||
| OPENID_BASE_URL | Base URL for OpenID Connect services
|
|
||||||
| OPENID_CLIENT_ID | Client ID for OpenID Connect authentication
|
|
||||||
| OPENID_CLIENT_SECRET | Client secret for OpenID Connect authentication
|
|
||||||
| OPENMETER_API_ENDPOINT | API endpoint for OpenMeter integration
|
|
||||||
| OPENMETER_API_KEY | API key for OpenMeter services
|
|
||||||
| OPENMETER_EVENT_TYPE | Type of events sent to OpenMeter
|
|
||||||
| OTEL_ENDPOINT | OpenTelemetry endpoint for traces
|
|
||||||
| OTEL_ENVIRONMENT_NAME | Environment name for OpenTelemetry
|
|
||||||
| OTEL_EXPORTER | Exporter type for OpenTelemetry
|
|
||||||
| OTEL_HEADERS | Headers for OpenTelemetry requests
|
|
||||||
| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry
|
|
||||||
| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing
|
|
||||||
| PREDIBASE_API_BASE | Base URL for Predibase API
|
|
||||||
| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service
|
|
||||||
| PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service
|
|
||||||
| PROMETHEUS_URL | URL for Prometheus service
|
|
||||||
| PROMPTLAYER_API_KEY | API key for PromptLayer integration
|
|
||||||
| PROXY_ADMIN_ID | Admin identifier for proxy server
|
|
||||||
| PROXY_BASE_URL | Base URL for proxy service
|
|
||||||
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
|
|
||||||
| PROXY_MASTER_KEY | Master key for proxy authentication
|
|
||||||
| QDRANT_API_BASE | Base URL for Qdrant API
|
|
||||||
| QDRANT_API_KEY | API key for Qdrant service
|
|
||||||
| QDRANT_URL | Connection URL for Qdrant database
|
|
||||||
| REDIS_HOST | Hostname for Redis server
|
|
||||||
| REDIS_PASSWORD | Password for Redis service
|
|
||||||
| REDIS_PORT | Port number for Redis server
|
|
||||||
| REDOC_URL | The path to the Redoc Fast API documentation. **By default this is "/redoc"**
|
|
||||||
| SERVER_ROOT_PATH | Root path for the server application
|
|
||||||
| SET_VERBOSE | Flag to enable verbose logging
|
|
||||||
| SLACK_DAILY_REPORT_FREQUENCY | Frequency of daily Slack reports (e.g., daily, weekly)
|
|
||||||
| SLACK_WEBHOOK_URL | Webhook URL for Slack integration
|
|
||||||
| SMTP_HOST | Hostname for the SMTP server
|
|
||||||
| SMTP_PASSWORD | Password for SMTP authentication
|
|
||||||
| SMTP_PORT | Port number for SMTP server
|
|
||||||
| SMTP_SENDER_EMAIL | Email address used as the sender in SMTP transactions
|
|
||||||
| SMTP_SENDER_LOGO | Logo used in emails sent via SMTP
|
|
||||||
| SMTP_TLS | Flag to enable or disable TLS for SMTP connections
|
|
||||||
| SMTP_USERNAME | Username for SMTP authentication
|
|
||||||
| SPEND_LOGS_URL | URL for retrieving spend logs
|
|
||||||
| SSL_CERTIFICATE | Path to the SSL certificate file
|
|
||||||
| SSL_VERIFY | Flag to enable or disable SSL certificate verification
|
|
||||||
| SUPABASE_KEY | API key for Supabase service
|
|
||||||
| SUPABASE_URL | Base URL for Supabase instance
|
|
||||||
| TEST_EMAIL_ADDRESS | Email address used for testing purposes
|
|
||||||
| UI_LOGO_PATH | Path to the logo image used in the UI
|
|
||||||
| UI_PASSWORD | Password for accessing the UI
|
|
||||||
| UI_USERNAME | Username for accessing the UI
|
|
||||||
| UPSTREAM_LANGFUSE_DEBUG | Flag to enable debugging for upstream Langfuse
|
|
||||||
| UPSTREAM_LANGFUSE_HOST | Host URL for upstream Langfuse service
|
|
||||||
| UPSTREAM_LANGFUSE_PUBLIC_KEY | Public key for upstream Langfuse authentication
|
|
||||||
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
|
|
||||||
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
|
|
||||||
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
|
|
||||||
| WEBHOOK_URL | URL for receiving webhooks from external services
|
|
||||||
## Extras
|
## Extras
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -16,25 +16,27 @@ model_list:
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
redis_host: <your-redis-host>
|
|
||||||
redis_password: <your-redis-password>
|
|
||||||
redis_port: <your-redis-port>
|
|
||||||
provider_budget_config:
|
provider_budget_config:
|
||||||
openai:
|
openai:
|
||||||
budget_limit: 0.000000000001 # float of $ value budget for time period
|
budget_limit: 0.000000000001 # float of $ value budget for time period
|
||||||
time_period: 1d # can be 1d, 2d, 30d
|
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
|
||||||
azure:
|
azure:
|
||||||
budget_limit: 100
|
budget_limit: 100
|
||||||
time_period: 1d
|
time_period: 1d
|
||||||
anthropic:
|
anthropic:
|
||||||
budget_limit: 100
|
budget_limit: 100
|
||||||
time_period: 10d
|
time_period: 10d
|
||||||
vertexai:
|
vertex_ai:
|
||||||
budget_limit: 100
|
budget_limit: 100
|
||||||
time_period: 12d
|
time_period: 12d
|
||||||
gemini:
|
gemini:
|
||||||
budget_limit: 100
|
budget_limit: 100
|
||||||
time_period: 12d
|
time_period: 12d
|
||||||
|
|
||||||
|
# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
|
||||||
|
redis_host: os.environ/REDIS_HOST
|
||||||
|
redis_port: os.environ/REDIS_PORT
|
||||||
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
@ -112,8 +114,11 @@ Expected response on failure
|
||||||
- If all providers exceed budget, raises an error
|
- If all providers exceed budget, raises an error
|
||||||
|
|
||||||
3. **Supported Time Periods**:
|
3. **Supported Time Periods**:
|
||||||
- Format: "Xd" where X is number of days
|
- Seconds: "Xs" (e.g., "30s")
|
||||||
- Examples: "1d" (1 day), "30d" (30 days)
|
- Minutes: "Xm" (e.g., "10m")
|
||||||
|
- Hours: "Xh" (e.g., "24h")
|
||||||
|
- Days: "Xd" (e.g., "1d", "30d")
|
||||||
|
- Months: "Xmo" (e.g., "1mo", "2mo")
|
||||||
|
|
||||||
4. **Requirements**:
|
4. **Requirements**:
|
||||||
- Redis required for tracking spend across instances
|
- Redis required for tracking spend across instances
|
||||||
|
@ -129,6 +134,31 @@ This metric indicates the remaining budget for a provider in dollars (USD)
|
||||||
litellm_provider_remaining_budget_metric{api_provider="openai"} 10
|
litellm_provider_remaining_budget_metric{api_provider="openai"} 10
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Multi-instance setup
|
||||||
|
|
||||||
|
If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-3.5-turbo
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
|
router_settings:
|
||||||
|
provider_budget_config:
|
||||||
|
openai:
|
||||||
|
budget_limit: 0.000000000001 # float of $ value budget for time period
|
||||||
|
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
|
||||||
|
|
||||||
|
# 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
|
||||||
|
redis_host: os.environ/REDIS_HOST
|
||||||
|
redis_port: os.environ/REDIS_PORT
|
||||||
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
```
|
||||||
|
|
||||||
## Spec for provider_budget_config
|
## Spec for provider_budget_config
|
||||||
|
|
||||||
|
@ -136,7 +166,12 @@ The `provider_budget_config` is a dictionary where:
|
||||||
- **Key**: Provider name (string) - Must be a valid [LiteLLM provider name](https://docs.litellm.ai/docs/providers)
|
- **Key**: Provider name (string) - Must be a valid [LiteLLM provider name](https://docs.litellm.ai/docs/providers)
|
||||||
- **Value**: Budget configuration object with the following parameters:
|
- **Value**: Budget configuration object with the following parameters:
|
||||||
- `budget_limit`: Float value representing the budget in USD
|
- `budget_limit`: Float value representing the budget in USD
|
||||||
- `time_period`: String in the format "Xd" where X is the number of days (e.g., "1d", "30d")
|
- `time_period`: Duration string in one of the following formats:
|
||||||
|
- Seconds: `"Xs"` (e.g., "30s")
|
||||||
|
- Minutes: `"Xm"` (e.g., "10m")
|
||||||
|
- Hours: `"Xh"` (e.g., "24h")
|
||||||
|
- Days: `"Xd"` (e.g., "1d", "30d")
|
||||||
|
- Months: `"Xmo"` (e.g., "1mo", "2mo")
|
||||||
|
|
||||||
Example structure:
|
Example structure:
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -147,4 +182,10 @@ provider_budget_config:
|
||||||
azure:
|
azure:
|
||||||
budget_limit: 500.0 # $500 USD
|
budget_limit: 500.0 # $500 USD
|
||||||
time_period: "30d" # 30 day period
|
time_period: "30d" # 30 day period
|
||||||
|
anthropic:
|
||||||
|
budget_limit: 200.0 # $200 USD
|
||||||
|
time_period: "1mo" # 1 month period
|
||||||
|
gemini:
|
||||||
|
budget_limit: 50.0 # $50 USD
|
||||||
|
time_period: "24h" # 24 hour period
|
||||||
```
|
```
|
24
docs/my-website/docs/router_architecture.md
Normal file
24
docs/my-website/docs/router_architecture.md
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# Router Architecture (Fallbacks / Retries)
|
||||||
|
|
||||||
|
## High Level architecture
|
||||||
|
|
||||||
|
<Image img={require('../img/router_architecture.png')} style={{ width: '100%', maxWidth: '4000px' }} />
|
||||||
|
|
||||||
|
### Request Flow
|
||||||
|
|
||||||
|
1. **User Sends Request**: The process begins when a user sends a request to the LiteLLM Router endpoint. All unified endpoints (`.completion`, `.embeddings`, etc) are supported by LiteLLM Router.
|
||||||
|
|
||||||
|
2. **function_with_fallbacks**: The initial request is sent to the `function_with_fallbacks` function. This function wraps the initial request in a try-except block, to handle any exceptions - doing fallbacks if needed. This request is then sent to the `function_with_retries` function.
|
||||||
|
|
||||||
|
|
||||||
|
3. **function_with_retries**: The `function_with_retries` function wraps the request in a try-except block and passes the initial request to a base litellm unified function (`litellm.completion`, `litellm.embeddings`, etc) to handle LLM API calling. `function_with_retries` handles any exceptions - doing retries on the model group if needed (i.e. if the request fails, it will retry on an available model within the model group).
|
||||||
|
|
||||||
|
4. **litellm.completion**: The `litellm.completion` function is a base function that handles the LLM API calling. It is used by `function_with_retries` to make the actual request to the LLM API.
|
||||||
|
|
||||||
|
## Legend
|
||||||
|
|
||||||
|
**model_group**: A group of LLM API deployments that share the same `model_name`, are part of the same `model_group`, and can be load balanced across.
|
BIN
docs/my-website/img/router_architecture.png
Normal file
BIN
docs/my-website/img/router_architecture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 59 KiB |
|
@ -29,13 +29,17 @@ const sidebars = {
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"proxy/docker_quick_start",
|
"proxy/docker_quick_start",
|
||||||
|
{
|
||||||
|
"type": "category",
|
||||||
|
"label": "Config.yaml",
|
||||||
|
"items": ["proxy/configs", "proxy/config_settings"]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Setup & Deployment",
|
label: "Setup & Deployment",
|
||||||
items: [
|
items: [
|
||||||
"proxy/deploy",
|
"proxy/deploy",
|
||||||
"proxy/prod",
|
"proxy/prod",
|
||||||
"proxy/configs",
|
|
||||||
"proxy/cli",
|
"proxy/cli",
|
||||||
"proxy/model_management",
|
"proxy/model_management",
|
||||||
"proxy/health",
|
"proxy/health",
|
||||||
|
@ -47,7 +51,7 @@ const sidebars = {
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Architecture",
|
label: "Architecture",
|
||||||
items: ["proxy/architecture", "proxy/db_info"],
|
items: ["proxy/architecture", "proxy/db_info", "router_architecture"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: "link",
|
type: "link",
|
||||||
|
@ -266,10 +270,10 @@ const sidebars = {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Load Balancing & Routing",
|
label: "Routing, Loadbalancing & Fallbacks",
|
||||||
link: {
|
link: {
|
||||||
type: "generated-index",
|
type: "generated-index",
|
||||||
title: "Load Balancing & Routing",
|
title: "Routing, Loadbalancing & Fallbacks",
|
||||||
description: "Learn how to load balance, route, and set fallbacks for your LLM requests",
|
description: "Learn how to load balance, route, and set fallbacks for your LLM requests",
|
||||||
slug: "/routing-load-balancing",
|
slug: "/routing-load-balancing",
|
||||||
},
|
},
|
||||||
|
|
|
@ -313,12 +313,13 @@ def get_redis_async_client(**env_overrides) -> async_redis.Redis:
|
||||||
|
|
||||||
def get_redis_connection_pool(**env_overrides):
|
def get_redis_connection_pool(**env_overrides):
|
||||||
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
||||||
|
verbose_logger.debug("get_redis_connection_pool: redis_kwargs", redis_kwargs)
|
||||||
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||||
return async_redis.BlockingConnectionPool.from_url(
|
return async_redis.BlockingConnectionPool.from_url(
|
||||||
timeout=5, url=redis_kwargs["url"]
|
timeout=5, url=redis_kwargs["url"]
|
||||||
)
|
)
|
||||||
connection_class = async_redis.Connection
|
connection_class = async_redis.Connection
|
||||||
if "ssl" in redis_kwargs and redis_kwargs["ssl"] is not None:
|
if "ssl" in redis_kwargs:
|
||||||
connection_class = async_redis.SSLConnection
|
connection_class = async_redis.SSLConnection
|
||||||
redis_kwargs.pop("ssl", None)
|
redis_kwargs.pop("ssl", None)
|
||||||
redis_kwargs["connection_class"] = connection_class
|
redis_kwargs["connection_class"] = connection_class
|
||||||
|
|
|
@ -20,6 +20,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Tuple
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import print_verbose, verbose_logger
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||||
|
from litellm.types.caching import RedisPipelineIncrementOperation
|
||||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||||
from litellm.types.utils import all_litellm_params
|
from litellm.types.utils import all_litellm_params
|
||||||
|
|
||||||
|
@ -890,3 +891,92 @@ class RedisCache(BaseCache):
|
||||||
|
|
||||||
def delete_cache(self, key):
|
def delete_cache(self, key):
|
||||||
self.redis_client.delete(key)
|
self.redis_client.delete(key)
|
||||||
|
|
||||||
|
async def _pipeline_increment_helper(
|
||||||
|
self,
|
||||||
|
pipe: pipeline,
|
||||||
|
increment_list: List[RedisPipelineIncrementOperation],
|
||||||
|
) -> Optional[List[float]]:
|
||||||
|
"""Helper function for pipeline increment operations"""
|
||||||
|
# Iterate through each increment operation and add commands to pipeline
|
||||||
|
for increment_op in increment_list:
|
||||||
|
cache_key = self.check_and_fix_namespace(key=increment_op["key"])
|
||||||
|
print_verbose(
|
||||||
|
f"Increment ASYNC Redis Cache PIPELINE: key: {cache_key}\nValue {increment_op['increment_value']}\nttl={increment_op['ttl']}"
|
||||||
|
)
|
||||||
|
pipe.incrbyfloat(cache_key, increment_op["increment_value"])
|
||||||
|
if increment_op["ttl"] is not None:
|
||||||
|
_td = timedelta(seconds=increment_op["ttl"])
|
||||||
|
pipe.expire(cache_key, _td)
|
||||||
|
# Execute the pipeline and return results
|
||||||
|
results = await pipe.execute()
|
||||||
|
print_verbose(f"Increment ASYNC Redis Cache PIPELINE: results: {results}")
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def async_increment_pipeline(
|
||||||
|
self, increment_list: List[RedisPipelineIncrementOperation], **kwargs
|
||||||
|
) -> Optional[List[float]]:
|
||||||
|
"""
|
||||||
|
Use Redis Pipelines for bulk increment operations
|
||||||
|
Args:
|
||||||
|
increment_list: List of RedisPipelineIncrementOperation dicts containing:
|
||||||
|
- key: str
|
||||||
|
- increment_value: float
|
||||||
|
- ttl_seconds: int
|
||||||
|
"""
|
||||||
|
# don't waste a network request if there's nothing to increment
|
||||||
|
if len(increment_list) == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
from redis.asyncio import Redis
|
||||||
|
|
||||||
|
_redis_client: Redis = self.init_async_client() # type: ignore
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
print_verbose(
|
||||||
|
f"Increment Async Redis Cache Pipeline: increment list: {increment_list}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with _redis_client as redis_client:
|
||||||
|
async with redis_client.pipeline(transaction=True) as pipe:
|
||||||
|
results = await self._pipeline_increment_helper(
|
||||||
|
pipe, increment_list
|
||||||
|
)
|
||||||
|
|
||||||
|
print_verbose(f"pipeline increment results: {results}")
|
||||||
|
|
||||||
|
## LOGGING ##
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="async_increment_pipeline",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
except Exception as e:
|
||||||
|
## LOGGING ##
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_failure_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
error=e,
|
||||||
|
call_type="async_increment_pipeline",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.error(
|
||||||
|
"LiteLLM Redis Caching: async increment_pipeline() - Got exception from REDIS %s",
|
||||||
|
str(e),
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
|
@ -8,4 +8,5 @@ Core files:
|
||||||
- `exception_mapping_utils.py`: utils for mapping exceptions to openai-compatible error types.
|
- `exception_mapping_utils.py`: utils for mapping exceptions to openai-compatible error types.
|
||||||
- `default_encoding.py`: code for loading the default encoding (tiktoken)
|
- `default_encoding.py`: code for loading the default encoding (tiktoken)
|
||||||
- `get_llm_provider_logic.py`: code for inferring the LLM provider from a given model name.
|
- `get_llm_provider_logic.py`: code for inferring the LLM provider from a given model name.
|
||||||
|
- `duration_parser.py`: code for parsing durations - e.g. "1d", "1mo", "10s"
|
||||||
|
|
||||||
|
|
92
litellm/litellm_core_utils/duration_parser.py
Normal file
92
litellm/litellm_core_utils/duration_parser.py
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
"""
|
||||||
|
Helper utilities for parsing durations - 1s, 1d, 10d, 30d, 1mo, 2mo
|
||||||
|
|
||||||
|
duration_in_seconds is used in diff parts of the code base, example
|
||||||
|
- Router - Provider budget routing
|
||||||
|
- Proxy - Key, Team Generation
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_from_regex(duration: str) -> Tuple[int, str]:
|
||||||
|
match = re.match(r"(\d+)(mo|[smhd]?)", duration)
|
||||||
|
|
||||||
|
if not match:
|
||||||
|
raise ValueError("Invalid duration format")
|
||||||
|
|
||||||
|
value, unit = match.groups()
|
||||||
|
value = int(value)
|
||||||
|
|
||||||
|
return value, unit
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_day_of_month(year, month):
|
||||||
|
# Handle December case
|
||||||
|
if month == 12:
|
||||||
|
return 31
|
||||||
|
# Next month is January, so subtract a day from March 1st
|
||||||
|
next_month = datetime(year=year, month=month + 1, day=1)
|
||||||
|
last_day_of_month = (next_month - timedelta(days=1)).day
|
||||||
|
return last_day_of_month
|
||||||
|
|
||||||
|
|
||||||
|
def duration_in_seconds(duration: str) -> int:
|
||||||
|
"""
|
||||||
|
Parameters:
|
||||||
|
- duration:
|
||||||
|
- "<number>s" - seconds
|
||||||
|
- "<number>m" - minutes
|
||||||
|
- "<number>h" - hours
|
||||||
|
- "<number>d" - days
|
||||||
|
- "<number>mo" - months
|
||||||
|
|
||||||
|
Returns time in seconds till when budget needs to be reset
|
||||||
|
"""
|
||||||
|
value, unit = _extract_from_regex(duration=duration)
|
||||||
|
|
||||||
|
if unit == "s":
|
||||||
|
return value
|
||||||
|
elif unit == "m":
|
||||||
|
return value * 60
|
||||||
|
elif unit == "h":
|
||||||
|
return value * 3600
|
||||||
|
elif unit == "d":
|
||||||
|
return value * 86400
|
||||||
|
elif unit == "mo":
|
||||||
|
now = time.time()
|
||||||
|
current_time = datetime.fromtimestamp(now)
|
||||||
|
|
||||||
|
if current_time.month == 12:
|
||||||
|
target_year = current_time.year + 1
|
||||||
|
target_month = 1
|
||||||
|
else:
|
||||||
|
target_year = current_time.year
|
||||||
|
target_month = current_time.month + value
|
||||||
|
|
||||||
|
# Determine the day to set for next month
|
||||||
|
target_day = current_time.day
|
||||||
|
last_day_of_target_month = get_last_day_of_month(target_year, target_month)
|
||||||
|
|
||||||
|
if target_day > last_day_of_target_month:
|
||||||
|
target_day = last_day_of_target_month
|
||||||
|
|
||||||
|
next_month = datetime(
|
||||||
|
year=target_year,
|
||||||
|
month=target_month,
|
||||||
|
day=target_day,
|
||||||
|
hour=current_time.hour,
|
||||||
|
minute=current_time.minute,
|
||||||
|
second=current_time.second,
|
||||||
|
microsecond=current_time.microsecond,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate the duration until the first day of the next month
|
||||||
|
duration_until_next_month = next_month - current_time
|
||||||
|
return int(duration_until_next_month.total_seconds())
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported duration unit, passed duration: {duration}")
|
1
litellm/proxy/_experimental/out/404.html
Normal file
1
litellm/proxy/_experimental/out/404.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
|
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(e,n,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(e){e.exports={style:{fontFamily:"'__Inter_86ef86', '__Inter_Fallback_86ef86'",fontStyle:"normal"},className:"__className_86ef86"}}},function(e){e.O(0,[971,69,744],function(){return e(e.s=87421)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{20723:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return g}});var l=t(57437),n=t(2265),a=t(47907),i=t(2179),r=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),f=t(777),p=t(37963),j=t(60620),_=t(13565);function g(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("invitation_id"),[g,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,v]=(0,n.useState)(null),[y,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,f.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,p.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),v(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(r.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",g,"token:",I,"formValues:",e),g&&I&&(e.user_email=S,N&&t&&(0,f.m_)(g,t,N,e.password).then(e=>{var s;let t="/ui/";t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id),document.cookie="token="+I,console.log("redirecting to:",t),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(_.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,902,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return g}});var l=t(57437),n=t(2265),a=t(47907),i=t(2179),r=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),f=t(777),p=t(37963),j=t(60620),_=t(13565);function g(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("invitation_id"),[g,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,v]=(0,n.useState)(null),[y,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,f.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,p.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),v(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(r.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",g,"token:",I,"formValues:",e),g&&I&&(e.user_email=S,N&&t&&(0,f.m_)(g,t,N,e.password).then(e=>{var s;let t="/ui/";t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id),document.cookie="token="+I,console.log("redirecting to:",t),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(_.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,902,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
||||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/ea3759ed931c00b2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/00256a1984d35914.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
|
@ -1,4 +1,4 @@
|
||||||
@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/ec159349637c90ad-s.woff2) format("woff2");unicode-range:u+0460-052f,u+1c80-1c88,u+20b4,u+2de0-2dff,u+a640-a69f,u+fe2e-fe2f}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/513657b02c5c193f-s.woff2) format("woff2");unicode-range:u+0301,u+0400-045f,u+0490-0491,u+04b0-04b1,u+2116}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/fd4db3eb5472fc27-s.woff2) format("woff2");unicode-range:u+1f??}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/51ed15f9841b9f9d-s.woff2) format("woff2");unicode-range:u+0370-0377,u+037a-037f,u+0384-038a,u+038c,u+038e-03a1,u+03a3-03ff}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/05a31a2ca4975f99-s.woff2) format("woff2");unicode-range:u+0102-0103,u+0110-0111,u+0128-0129,u+0168-0169,u+01a0-01a1,u+01af-01b0,u+0300-0301,u+0303-0304,u+0308-0309,u+0323,u+0329,u+1ea0-1ef9,u+20ab}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/d6b16ce4a6175f26-s.woff2) format("woff2");unicode-range:u+0100-02af,u+0304,u+0308,u+0329,u+1e00-1e9f,u+1ef2-1eff,u+2020,u+20a0-20ab,u+20ad-20c0,u+2113,u+2c60-2c7f,u+a720-a7ff}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2) format("woff2");unicode-range:u+00??,u+0131,u+0152-0153,u+02bb-02bc,u+02c6,u+02da,u+02dc,u+0304,u+0308,u+0329,u+2000-206f,u+2074,u+20ac,u+2122,u+2191,u+2193,u+2212,u+2215,u+feff,u+fffd}@font-face{font-family:__Inter_Fallback_12bbc4;src:local("Arial");ascent-override:90.20%;descent-override:22.48%;line-gap-override:0.00%;size-adjust:107.40%}.__className_12bbc4{font-family:__Inter_12bbc4,__Inter_Fallback_12bbc4;font-style:normal}
|
@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/55c55f0601d81cf3-s.woff2) format("woff2");unicode-range:u+0460-052f,u+1c80-1c88,u+20b4,u+2de0-2dff,u+a640-a69f,u+fe2e-fe2f}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/26a46d62cd723877-s.woff2) format("woff2");unicode-range:u+0301,u+0400-045f,u+0490-0491,u+04b0-04b1,u+2116}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/97e0cb1ae144a2a9-s.woff2) format("woff2");unicode-range:u+1f??}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/581909926a08bbc8-s.woff2) format("woff2");unicode-range:u+0370-0377,u+037a-037f,u+0384-038a,u+038c,u+038e-03a1,u+03a3-03ff}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/df0a9ae256c0569c-s.woff2) format("woff2");unicode-range:u+0102-0103,u+0110-0111,u+0128-0129,u+0168-0169,u+01a0-01a1,u+01af-01b0,u+0300-0301,u+0303-0304,u+0308-0309,u+0323,u+0329,u+1ea0-1ef9,u+20ab}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/6d93bde91c0c2823-s.woff2) format("woff2");unicode-range:u+0100-02af,u+0304,u+0308,u+0329,u+1e00-1e9f,u+1ef2-1eff,u+2020,u+20a0-20ab,u+20ad-20c0,u+2113,u+2c60-2c7f,u+a720-a7ff}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2) format("woff2");unicode-range:u+00??,u+0131,u+0152-0153,u+02bb-02bc,u+02c6,u+02da,u+02dc,u+0304,u+0308,u+0329,u+2000-206f,u+2074,u+20ac,u+2122,u+2191,u+2193,u+2212,u+2215,u+feff,u+fffd}@font-face{font-family:__Inter_Fallback_86ef86;src:local("Arial");ascent-override:90.20%;descent-override:22.48%;line-gap-override:0.00%;size-adjust:107.40%}.__className_86ef86{font-family:__Inter_86ef86,__Inter_Fallback_86ef86;font-style:normal}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
! tailwindcss v3.4.1 | MIT License | https://tailwindcss.com
|
! tailwindcss v3.4.1 | MIT License | https://tailwindcss.com
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-b9c71b6f9761a436.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-b9c71b6f9761a436.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/ea3759ed931c00b2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[82989,[\"665\",\"static/chunks/3014691f-b24e8254c7593934.js\",\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"902\",\"static/chunks/902-58bf23027703b2e8.js\",\"131\",\"static/chunks/131-3d2257b0ff5aadb2.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"626\",\"static/chunks/626-fc3969bfc35ead00.js\",\"777\",\"static/chunks/777-9d9df0b75010dbf9.js\",\"931\",\"static/chunks/app/page-bd2e157c2bc2f150.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/ea3759ed931c00b2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"e-Zsp_y3gSAoiJHmJByXA\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/00256a1984d35914.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[82989,[\"665\",\"static/chunks/3014691f-b24e8254c7593934.js\",\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"902\",\"static/chunks/902-58bf23027703b2e8.js\",\"131\",\"static/chunks/131-3d2257b0ff5aadb2.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"626\",\"static/chunks/626-4e8df4039ecf4386.js\",\"777\",\"static/chunks/777-9d9df0b75010dbf9.js\",\"931\",\"static/chunks/app/page-68b04cd7217f38ce.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/00256a1984d35914.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"WeMIGILYzOYN-R9DXbvCD\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_86ef86\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[82989,["665","static/chunks/3014691f-b24e8254c7593934.js","936","static/chunks/2f6dbc85-cac2949a76539886.js","902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","684","static/chunks/684-16b194c83a169f6d.js","626","static/chunks/626-fc3969bfc35ead00.js","777","static/chunks/777-9d9df0b75010dbf9.js","931","static/chunks/app/page-bd2e157c2bc2f150.js"],""]
|
3:I[82989,["665","static/chunks/3014691f-b24e8254c7593934.js","936","static/chunks/2f6dbc85-cac2949a76539886.js","902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","684","static/chunks/684-16b194c83a169f6d.js","626","static/chunks/626-4e8df4039ecf4386.js","777","static/chunks/777-9d9df0b75010dbf9.js","931","static/chunks/app/page-68b04cd7217f38ce.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["e-Zsp_y3gSAoiJHmJByXA",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["WeMIGILYzOYN-R9DXbvCD",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[87494,["902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","777","static/chunks/777-9d9df0b75010dbf9.js","418","static/chunks/app/model_hub/page-748a83a8e772a56b.js"],""]
|
3:I[87494,["902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","777","static/chunks/777-9d9df0b75010dbf9.js","418","static/chunks/app/model_hub/page-104cada6b5e5b14c.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["e-Zsp_y3gSAoiJHmJByXA",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["WeMIGILYzOYN-R9DXbvCD",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","902","static/chunks/902-58bf23027703b2e8.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-9d9df0b75010dbf9.js","461","static/chunks/app/onboarding/page-884a15d08f8be397.js"],""]
|
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","902","static/chunks/902-58bf23027703b2e8.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-9d9df0b75010dbf9.js","461","static/chunks/app/onboarding/page-bad6cfbe58b9d19c.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["e-Zsp_y3gSAoiJHmJByXA",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["WeMIGILYzOYN-R9DXbvCD",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -2110,6 +2110,7 @@ class SpecialHeaders(enum.Enum):
|
||||||
openai_authorization = "Authorization"
|
openai_authorization = "Authorization"
|
||||||
azure_authorization = "API-Key"
|
azure_authorization = "API-Key"
|
||||||
anthropic_authorization = "x-api-key"
|
anthropic_authorization = "x-api-key"
|
||||||
|
google_ai_studio_authorization = "x-goog-api-key"
|
||||||
|
|
||||||
|
|
||||||
class LitellmDataForBackendLLMCall(TypedDict, total=False):
|
class LitellmDataForBackendLLMCall(TypedDict, total=False):
|
||||||
|
|
|
@ -95,6 +95,11 @@ anthropic_api_key_header = APIKeyHeader(
|
||||||
auto_error=False,
|
auto_error=False,
|
||||||
description="If anthropic client used.",
|
description="If anthropic client used.",
|
||||||
)
|
)
|
||||||
|
google_ai_studio_api_key_header = APIKeyHeader(
|
||||||
|
name=SpecialHeaders.google_ai_studio_authorization.value,
|
||||||
|
auto_error=False,
|
||||||
|
description="If google ai studio client used.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _get_bearer_token(
|
def _get_bearer_token(
|
||||||
|
@ -197,6 +202,9 @@ async def user_api_key_auth( # noqa: PLR0915
|
||||||
anthropic_api_key_header: Optional[str] = fastapi.Security(
|
anthropic_api_key_header: Optional[str] = fastapi.Security(
|
||||||
anthropic_api_key_header
|
anthropic_api_key_header
|
||||||
),
|
),
|
||||||
|
google_ai_studio_api_key_header: Optional[str] = fastapi.Security(
|
||||||
|
google_ai_studio_api_key_header
|
||||||
|
),
|
||||||
) -> UserAPIKeyAuth:
|
) -> UserAPIKeyAuth:
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
general_settings,
|
general_settings,
|
||||||
|
@ -233,6 +241,8 @@ async def user_api_key_auth( # noqa: PLR0915
|
||||||
api_key = azure_api_key_header
|
api_key = azure_api_key_header
|
||||||
elif isinstance(anthropic_api_key_header, str):
|
elif isinstance(anthropic_api_key_header, str):
|
||||||
api_key = anthropic_api_key_header
|
api_key = anthropic_api_key_header
|
||||||
|
elif isinstance(google_ai_studio_api_key_header, str):
|
||||||
|
api_key = google_ai_studio_api_key_header
|
||||||
elif pass_through_endpoints is not None:
|
elif pass_through_endpoints is not None:
|
||||||
for endpoint in pass_through_endpoints:
|
for endpoint in pass_through_endpoints:
|
||||||
if endpoint.get("path", "") == route:
|
if endpoint.get("path", "") == route:
|
||||||
|
|
|
@ -30,7 +30,7 @@ from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.proxy._types import *
|
from litellm.proxy._types import *
|
||||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||||
_duration_in_seconds,
|
duration_in_seconds,
|
||||||
generate_key_helper_fn,
|
generate_key_helper_fn,
|
||||||
)
|
)
|
||||||
from litellm.proxy.management_helpers.utils import (
|
from litellm.proxy.management_helpers.utils import (
|
||||||
|
@ -516,7 +516,7 @@ async def user_update(
|
||||||
is_internal_user = True
|
is_internal_user = True
|
||||||
|
|
||||||
if "budget_duration" in non_default_values:
|
if "budget_duration" in non_default_values:
|
||||||
duration_s = _duration_in_seconds(
|
duration_s = duration_in_seconds(
|
||||||
duration=non_default_values["budget_duration"]
|
duration=non_default_values["budget_duration"]
|
||||||
)
|
)
|
||||||
user_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
user_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
|
@ -535,7 +535,7 @@ async def user_update(
|
||||||
non_default_values["budget_duration"] = (
|
non_default_values["budget_duration"] = (
|
||||||
litellm.internal_user_budget_duration
|
litellm.internal_user_budget_duration
|
||||||
)
|
)
|
||||||
duration_s = _duration_in_seconds(
|
duration_s = duration_in_seconds(
|
||||||
duration=non_default_values["budget_duration"]
|
duration=non_default_values["budget_duration"]
|
||||||
)
|
)
|
||||||
user_reset_at = datetime.now(timezone.utc) + timedelta(
|
user_reset_at = datetime.now(timezone.utc) + timedelta(
|
||||||
|
@ -725,8 +725,8 @@ async def delete_user(
|
||||||
- user_ids: List[str] - The list of user id's to be deleted.
|
- user_ids: List[str] - The list of user id's to be deleted.
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
user_api_key_cache,
|
user_api_key_cache,
|
||||||
|
|
|
@ -35,8 +35,8 @@ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
from litellm.proxy.hooks.key_management_event_hooks import KeyManagementEventHooks
|
from litellm.proxy.hooks.key_management_event_hooks import KeyManagementEventHooks
|
||||||
from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
|
from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
|
||||||
from litellm.proxy.utils import (
|
from litellm.proxy.utils import (
|
||||||
_duration_in_seconds,
|
|
||||||
_hash_token_if_needed,
|
_hash_token_if_needed,
|
||||||
|
duration_in_seconds,
|
||||||
handle_exception_on_proxy,
|
handle_exception_on_proxy,
|
||||||
)
|
)
|
||||||
from litellm.secret_managers.main import get_secret
|
from litellm.secret_managers.main import get_secret
|
||||||
|
@ -362,10 +362,10 @@ async def generate_key_fn( # noqa: PLR0915
|
||||||
)
|
)
|
||||||
# Compare durations
|
# Compare durations
|
||||||
elif key in ["budget_duration", "duration"]:
|
elif key in ["budget_duration", "duration"]:
|
||||||
upperbound_duration = _duration_in_seconds(
|
upperbound_duration = duration_in_seconds(
|
||||||
duration=upperbound_value
|
duration=upperbound_value
|
||||||
)
|
)
|
||||||
user_duration = _duration_in_seconds(duration=value)
|
user_duration = duration_in_seconds(duration=value)
|
||||||
if user_duration > upperbound_duration:
|
if user_duration > upperbound_duration:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
|
@ -462,7 +462,7 @@ def prepare_key_update_data(
|
||||||
if "duration" in non_default_values:
|
if "duration" in non_default_values:
|
||||||
duration = non_default_values.pop("duration")
|
duration = non_default_values.pop("duration")
|
||||||
if duration and (isinstance(duration, str)) and len(duration) > 0:
|
if duration and (isinstance(duration, str)) and len(duration) > 0:
|
||||||
duration_s = _duration_in_seconds(duration=duration)
|
duration_s = duration_in_seconds(duration=duration)
|
||||||
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
non_default_values["expires"] = expires
|
non_default_values["expires"] = expires
|
||||||
|
|
||||||
|
@ -473,7 +473,7 @@ def prepare_key_update_data(
|
||||||
and (isinstance(budget_duration, str))
|
and (isinstance(budget_duration, str))
|
||||||
and len(budget_duration) > 0
|
and len(budget_duration) > 0
|
||||||
):
|
):
|
||||||
duration_s = _duration_in_seconds(duration=budget_duration)
|
duration_s = duration_in_seconds(duration=budget_duration)
|
||||||
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
non_default_values["budget_reset_at"] = key_reset_at
|
non_default_values["budget_reset_at"] = key_reset_at
|
||||||
|
|
||||||
|
@ -973,19 +973,19 @@ async def generate_key_helper_fn( # noqa: PLR0915
|
||||||
if duration is None: # allow tokens that never expire
|
if duration is None: # allow tokens that never expire
|
||||||
expires = None
|
expires = None
|
||||||
else:
|
else:
|
||||||
duration_s = _duration_in_seconds(duration=duration)
|
duration_s = duration_in_seconds(duration=duration)
|
||||||
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
|
|
||||||
if key_budget_duration is None: # one-time budget
|
if key_budget_duration is None: # one-time budget
|
||||||
key_reset_at = None
|
key_reset_at = None
|
||||||
else:
|
else:
|
||||||
duration_s = _duration_in_seconds(duration=key_budget_duration)
|
duration_s = duration_in_seconds(duration=key_budget_duration)
|
||||||
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
|
|
||||||
if budget_duration is None: # one-time budget
|
if budget_duration is None: # one-time budget
|
||||||
reset_at = None
|
reset_at = None
|
||||||
else:
|
else:
|
||||||
duration_s = _duration_in_seconds(duration=budget_duration)
|
duration_s = duration_in_seconds(duration=budget_duration)
|
||||||
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
|
|
||||||
aliases_json = json.dumps(aliases)
|
aliases_json = json.dumps(aliases)
|
||||||
|
|
|
@ -90,8 +90,8 @@ async def add_team_callbacks(
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
|
|
@ -169,8 +169,8 @@ async def new_team( # noqa: PLR0915
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
@ -289,7 +289,7 @@ async def new_team( # noqa: PLR0915
|
||||||
|
|
||||||
# If budget_duration is set, set `budget_reset_at`
|
# If budget_duration is set, set `budget_reset_at`
|
||||||
if complete_team_data.budget_duration is not None:
|
if complete_team_data.budget_duration is not None:
|
||||||
duration_s = _duration_in_seconds(duration=complete_team_data.budget_duration)
|
duration_s = duration_in_seconds(duration=complete_team_data.budget_duration)
|
||||||
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
complete_team_data.budget_reset_at = reset_at
|
complete_team_data.budget_reset_at = reset_at
|
||||||
|
|
||||||
|
@ -396,8 +396,8 @@ async def update_team(
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.auth.auth_checks import _cache_team_object
|
from litellm.proxy.auth.auth_checks import _cache_team_object
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
proxy_logging_obj,
|
proxy_logging_obj,
|
||||||
|
@ -425,7 +425,7 @@ async def update_team(
|
||||||
|
|
||||||
# Check budget_duration and budget_reset_at
|
# Check budget_duration and budget_reset_at
|
||||||
if data.budget_duration is not None:
|
if data.budget_duration is not None:
|
||||||
duration_s = _duration_in_seconds(duration=data.budget_duration)
|
duration_s = duration_in_seconds(duration=data.budget_duration)
|
||||||
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
|
|
||||||
# set the budget_reset_at in DB
|
# set the budget_reset_at in DB
|
||||||
|
@ -710,8 +710,8 @@ async def team_member_delete(
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
@ -830,8 +830,8 @@ async def team_member_update(
|
||||||
Update team member budgets
|
Update team member budgets
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
@ -966,8 +966,8 @@ async def delete_team(
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
@ -1055,8 +1055,8 @@ async def team_info(
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
@ -1204,8 +1204,8 @@ async def block_team(
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
@ -1252,8 +1252,8 @@ async def unblock_team(
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
@ -1295,8 +1295,8 @@ async def list_team(
|
||||||
- user_id: str - Optional. If passed will only return teams that the user_id is a member of.
|
- user_id: str - Optional. If passed will only return teams that the user_id is a member of.
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_duration_in_seconds,
|
|
||||||
create_audit_log_for_update,
|
create_audit_log_for_update,
|
||||||
|
duration_in_seconds,
|
||||||
litellm_proxy_admin_name,
|
litellm_proxy_admin_name,
|
||||||
prisma_client,
|
prisma_client,
|
||||||
)
|
)
|
||||||
|
|
|
@ -68,10 +68,12 @@ async def gemini_proxy_route(
|
||||||
[Docs](https://docs.litellm.ai/docs/pass_through/google_ai_studio)
|
[Docs](https://docs.litellm.ai/docs/pass_through/google_ai_studio)
|
||||||
"""
|
"""
|
||||||
## CHECK FOR LITELLM API KEY IN THE QUERY PARAMS - ?..key=LITELLM_API_KEY
|
## CHECK FOR LITELLM API KEY IN THE QUERY PARAMS - ?..key=LITELLM_API_KEY
|
||||||
api_key = request.query_params.get("key")
|
google_ai_studio_api_key = request.query_params.get("key") or request.headers.get(
|
||||||
|
"x-goog-api-key"
|
||||||
|
)
|
||||||
|
|
||||||
user_api_key_dict = await user_api_key_auth(
|
user_api_key_dict = await user_api_key_auth(
|
||||||
request=request, api_key="Bearer {}".format(api_key)
|
request=request, api_key=f"Bearer {google_ai_studio_api_key}"
|
||||||
)
|
)
|
||||||
|
|
||||||
base_target_url = "https://generativelanguage.googleapis.com"
|
base_target_url = "https://generativelanguage.googleapis.com"
|
||||||
|
|
|
@ -393,6 +393,7 @@ async def pass_through_request( # noqa: PLR0915
|
||||||
_parsed_body=_parsed_body,
|
_parsed_body=_parsed_body,
|
||||||
passthrough_logging_payload=passthrough_logging_payload,
|
passthrough_logging_payload=passthrough_logging_payload,
|
||||||
litellm_call_id=litellm_call_id,
|
litellm_call_id=litellm_call_id,
|
||||||
|
request=request,
|
||||||
)
|
)
|
||||||
# done for supporting 'parallel_request_limiter.py' with pass-through endpoints
|
# done for supporting 'parallel_request_limiter.py' with pass-through endpoints
|
||||||
logging_obj.update_environment_variables(
|
logging_obj.update_environment_variables(
|
||||||
|
@ -528,16 +529,18 @@ async def pass_through_request( # noqa: PLR0915
|
||||||
response_body: Optional[dict] = get_response_body(response)
|
response_body: Optional[dict] = get_response_body(response)
|
||||||
passthrough_logging_payload["response_body"] = response_body
|
passthrough_logging_payload["response_body"] = response_body
|
||||||
end_time = datetime.now()
|
end_time = datetime.now()
|
||||||
await pass_through_endpoint_logging.pass_through_async_success_handler(
|
asyncio.create_task(
|
||||||
httpx_response=response,
|
pass_through_endpoint_logging.pass_through_async_success_handler(
|
||||||
response_body=response_body,
|
httpx_response=response,
|
||||||
url_route=str(url),
|
response_body=response_body,
|
||||||
result="",
|
url_route=str(url),
|
||||||
start_time=start_time,
|
result="",
|
||||||
end_time=end_time,
|
start_time=start_time,
|
||||||
logging_obj=logging_obj,
|
end_time=end_time,
|
||||||
cache_hit=False,
|
logging_obj=logging_obj,
|
||||||
**kwargs,
|
cache_hit=False,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return Response(
|
return Response(
|
||||||
|
@ -572,6 +575,7 @@ async def pass_through_request( # noqa: PLR0915
|
||||||
|
|
||||||
|
|
||||||
def _init_kwargs_for_pass_through_endpoint(
|
def _init_kwargs_for_pass_through_endpoint(
|
||||||
|
request: Request,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
passthrough_logging_payload: PassthroughStandardLoggingPayload,
|
passthrough_logging_payload: PassthroughStandardLoggingPayload,
|
||||||
_parsed_body: Optional[dict] = None,
|
_parsed_body: Optional[dict] = None,
|
||||||
|
@ -587,6 +591,12 @@ def _init_kwargs_for_pass_through_endpoint(
|
||||||
}
|
}
|
||||||
if _litellm_metadata:
|
if _litellm_metadata:
|
||||||
_metadata.update(_litellm_metadata)
|
_metadata.update(_litellm_metadata)
|
||||||
|
|
||||||
|
_metadata = _update_metadata_with_tags_in_header(
|
||||||
|
request=request,
|
||||||
|
metadata=_metadata,
|
||||||
|
)
|
||||||
|
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
"metadata": _metadata,
|
"metadata": _metadata,
|
||||||
|
@ -598,6 +608,18 @@ def _init_kwargs_for_pass_through_endpoint(
|
||||||
return kwargs
|
return kwargs
|
||||||
|
|
||||||
|
|
||||||
|
def _update_metadata_with_tags_in_header(request: Request, metadata: dict) -> dict:
|
||||||
|
"""
|
||||||
|
If tags are in the request headers, add them to the metadata
|
||||||
|
|
||||||
|
Used for google and vertex JS SDKs
|
||||||
|
"""
|
||||||
|
_tags = request.headers.get("tags")
|
||||||
|
if _tags:
|
||||||
|
metadata["tags"] = _tags.split(",")
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
def create_pass_through_route(
|
def create_pass_through_route(
|
||||||
endpoint,
|
endpoint,
|
||||||
target: str,
|
target: str,
|
||||||
|
|
|
@ -58,15 +58,17 @@ class PassThroughStreamingHandler:
|
||||||
# After all chunks are processed, handle post-processing
|
# After all chunks are processed, handle post-processing
|
||||||
end_time = datetime.now()
|
end_time = datetime.now()
|
||||||
|
|
||||||
await PassThroughStreamingHandler._route_streaming_logging_to_handler(
|
asyncio.create_task(
|
||||||
litellm_logging_obj=litellm_logging_obj,
|
PassThroughStreamingHandler._route_streaming_logging_to_handler(
|
||||||
passthrough_success_handler_obj=passthrough_success_handler_obj,
|
litellm_logging_obj=litellm_logging_obj,
|
||||||
url_route=url_route,
|
passthrough_success_handler_obj=passthrough_success_handler_obj,
|
||||||
request_body=request_body or {},
|
url_route=url_route,
|
||||||
endpoint_type=endpoint_type,
|
request_body=request_body or {},
|
||||||
start_time=start_time,
|
endpoint_type=endpoint_type,
|
||||||
raw_bytes=raw_bytes,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
raw_bytes=raw_bytes,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(f"Error in chunk_processor: {str(e)}")
|
verbose_proxy_logger.error(f"Error in chunk_processor: {str(e)}")
|
||||||
|
@ -108,9 +110,9 @@ class PassThroughStreamingHandler:
|
||||||
all_chunks=all_chunks,
|
all_chunks=all_chunks,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
standard_logging_response_object = anthropic_passthrough_logging_handler_result[
|
standard_logging_response_object = (
|
||||||
"result"
|
anthropic_passthrough_logging_handler_result["result"]
|
||||||
]
|
)
|
||||||
kwargs = anthropic_passthrough_logging_handler_result["kwargs"]
|
kwargs = anthropic_passthrough_logging_handler_result["kwargs"]
|
||||||
elif endpoint_type == EndpointType.VERTEX_AI:
|
elif endpoint_type == EndpointType.VERTEX_AI:
|
||||||
vertex_passthrough_logging_handler_result = (
|
vertex_passthrough_logging_handler_result = (
|
||||||
|
@ -125,9 +127,9 @@ class PassThroughStreamingHandler:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
standard_logging_response_object = vertex_passthrough_logging_handler_result[
|
standard_logging_response_object = (
|
||||||
"result"
|
vertex_passthrough_logging_handler_result["result"]
|
||||||
]
|
)
|
||||||
kwargs = vertex_passthrough_logging_handler_result["kwargs"]
|
kwargs = vertex_passthrough_logging_handler_result["kwargs"]
|
||||||
|
|
||||||
if standard_logging_response_object is None:
|
if standard_logging_response_object is None:
|
||||||
|
|
|
@ -18,6 +18,7 @@ from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_stu
|
||||||
from litellm.proxy._types import PassThroughEndpointLoggingResultValues
|
from litellm.proxy._types import PassThroughEndpointLoggingResultValues
|
||||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
from litellm.types.utils import StandardPassThroughResponseObject
|
from litellm.types.utils import StandardPassThroughResponseObject
|
||||||
|
from litellm.utils import executor as thread_pool_executor
|
||||||
|
|
||||||
from .llm_provider_handlers.anthropic_passthrough_logging_handler import (
|
from .llm_provider_handlers.anthropic_passthrough_logging_handler import (
|
||||||
AnthropicPassthroughLoggingHandler,
|
AnthropicPassthroughLoggingHandler,
|
||||||
|
@ -93,15 +94,16 @@ class PassThroughEndpointLogging:
|
||||||
standard_logging_response_object = StandardPassThroughResponseObject(
|
standard_logging_response_object = StandardPassThroughResponseObject(
|
||||||
response=httpx_response.text
|
response=httpx_response.text
|
||||||
)
|
)
|
||||||
threading.Thread(
|
thread_pool_executor.submit(
|
||||||
target=logging_obj.success_handler,
|
logging_obj.success_handler,
|
||||||
args=(
|
args=(
|
||||||
standard_logging_response_object,
|
standard_logging_response_object,
|
||||||
start_time,
|
start_time,
|
||||||
end_time,
|
end_time,
|
||||||
cache_hit,
|
cache_hit,
|
||||||
),
|
),
|
||||||
).start()
|
)
|
||||||
|
|
||||||
await logging_obj.async_success_handler(
|
await logging_obj.async_success_handler(
|
||||||
result=(
|
result=(
|
||||||
json.dumps(result)
|
json.dumps(result)
|
||||||
|
|
|
@ -2,8 +2,24 @@ model_list:
|
||||||
- model_name: gpt-4o
|
- model_name: gpt-4o
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/gpt-4o
|
model: openai/gpt-4o
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
- model_name: fake-anthropic-endpoint
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/fake
|
||||||
|
api_base: https://exampleanthropicendpoint-production.up.railway.app/
|
||||||
|
|
||||||
default_vertex_config:
|
router_settings:
|
||||||
vertex_project: "adroit-crow-413218"
|
provider_budget_config:
|
||||||
vertex_location: "us-central1"
|
openai:
|
||||||
|
budget_limit: 0.3 # float of $ value budget for time period
|
||||||
|
time_period: 1d # can be 1d, 2d, 30d
|
||||||
|
anthropic:
|
||||||
|
budget_limit: 5
|
||||||
|
time_period: 1d
|
||||||
|
redis_host: os.environ/REDIS_HOST
|
||||||
|
redis_port: os.environ/REDIS_PORT
|
||||||
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["prometheus"]
|
||||||
|
success_callback: ["langfuse"]
|
|
@ -182,8 +182,8 @@ from litellm.proxy.management_endpoints.internal_user_endpoints import (
|
||||||
)
|
)
|
||||||
from litellm.proxy.management_endpoints.internal_user_endpoints import user_update
|
from litellm.proxy.management_endpoints.internal_user_endpoints import user_update
|
||||||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||||
_duration_in_seconds,
|
|
||||||
delete_verification_token,
|
delete_verification_token,
|
||||||
|
duration_in_seconds,
|
||||||
generate_key_helper_fn,
|
generate_key_helper_fn,
|
||||||
)
|
)
|
||||||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||||
|
|
|
@ -26,6 +26,11 @@ from typing import (
|
||||||
overload,
|
overload,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.duration_parser import (
|
||||||
|
_extract_from_regex,
|
||||||
|
duration_in_seconds,
|
||||||
|
get_last_day_of_month,
|
||||||
|
)
|
||||||
from litellm.proxy._types import ProxyErrorTypes, ProxyException
|
from litellm.proxy._types import ProxyErrorTypes, ProxyException
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -2429,86 +2434,6 @@ def _hash_token_if_needed(token: str) -> str:
|
||||||
return token
|
return token
|
||||||
|
|
||||||
|
|
||||||
def _extract_from_regex(duration: str) -> Tuple[int, str]:
|
|
||||||
match = re.match(r"(\d+)(mo|[smhd]?)", duration)
|
|
||||||
|
|
||||||
if not match:
|
|
||||||
raise ValueError("Invalid duration format")
|
|
||||||
|
|
||||||
value, unit = match.groups()
|
|
||||||
value = int(value)
|
|
||||||
|
|
||||||
return value, unit
|
|
||||||
|
|
||||||
|
|
||||||
def get_last_day_of_month(year, month):
|
|
||||||
# Handle December case
|
|
||||||
if month == 12:
|
|
||||||
return 31
|
|
||||||
# Next month is January, so subtract a day from March 1st
|
|
||||||
next_month = datetime(year=year, month=month + 1, day=1)
|
|
||||||
last_day_of_month = (next_month - timedelta(days=1)).day
|
|
||||||
return last_day_of_month
|
|
||||||
|
|
||||||
|
|
||||||
def _duration_in_seconds(duration: str) -> int:
|
|
||||||
"""
|
|
||||||
Parameters:
|
|
||||||
- duration:
|
|
||||||
- "<number>s" - seconds
|
|
||||||
- "<number>m" - minutes
|
|
||||||
- "<number>h" - hours
|
|
||||||
- "<number>d" - days
|
|
||||||
- "<number>mo" - months
|
|
||||||
|
|
||||||
Returns time in seconds till when budget needs to be reset
|
|
||||||
"""
|
|
||||||
value, unit = _extract_from_regex(duration=duration)
|
|
||||||
|
|
||||||
if unit == "s":
|
|
||||||
return value
|
|
||||||
elif unit == "m":
|
|
||||||
return value * 60
|
|
||||||
elif unit == "h":
|
|
||||||
return value * 3600
|
|
||||||
elif unit == "d":
|
|
||||||
return value * 86400
|
|
||||||
elif unit == "mo":
|
|
||||||
now = time.time()
|
|
||||||
current_time = datetime.fromtimestamp(now)
|
|
||||||
|
|
||||||
if current_time.month == 12:
|
|
||||||
target_year = current_time.year + 1
|
|
||||||
target_month = 1
|
|
||||||
else:
|
|
||||||
target_year = current_time.year
|
|
||||||
target_month = current_time.month + value
|
|
||||||
|
|
||||||
# Determine the day to set for next month
|
|
||||||
target_day = current_time.day
|
|
||||||
last_day_of_target_month = get_last_day_of_month(target_year, target_month)
|
|
||||||
|
|
||||||
if target_day > last_day_of_target_month:
|
|
||||||
target_day = last_day_of_target_month
|
|
||||||
|
|
||||||
next_month = datetime(
|
|
||||||
year=target_year,
|
|
||||||
month=target_month,
|
|
||||||
day=target_day,
|
|
||||||
hour=current_time.hour,
|
|
||||||
minute=current_time.minute,
|
|
||||||
second=current_time.second,
|
|
||||||
microsecond=current_time.microsecond,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Calculate the duration until the first day of the next month
|
|
||||||
duration_until_next_month = next_month - current_time
|
|
||||||
return int(duration_until_next_month.total_seconds())
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError("Unsupported duration unit")
|
|
||||||
|
|
||||||
|
|
||||||
async def reset_budget(prisma_client: PrismaClient):
|
async def reset_budget(prisma_client: PrismaClient):
|
||||||
"""
|
"""
|
||||||
Gets all the non-expired keys for a db, which need spend to be reset
|
Gets all the non-expired keys for a db, which need spend to be reset
|
||||||
|
@ -2527,7 +2452,7 @@ async def reset_budget(prisma_client: PrismaClient):
|
||||||
if keys_to_reset is not None and len(keys_to_reset) > 0:
|
if keys_to_reset is not None and len(keys_to_reset) > 0:
|
||||||
for key in keys_to_reset:
|
for key in keys_to_reset:
|
||||||
key.spend = 0.0
|
key.spend = 0.0
|
||||||
duration_s = _duration_in_seconds(duration=key.budget_duration)
|
duration_s = duration_in_seconds(duration=key.budget_duration)
|
||||||
key.budget_reset_at = now + timedelta(seconds=duration_s)
|
key.budget_reset_at = now + timedelta(seconds=duration_s)
|
||||||
|
|
||||||
await prisma_client.update_data(
|
await prisma_client.update_data(
|
||||||
|
@ -2543,7 +2468,7 @@ async def reset_budget(prisma_client: PrismaClient):
|
||||||
if users_to_reset is not None and len(users_to_reset) > 0:
|
if users_to_reset is not None and len(users_to_reset) > 0:
|
||||||
for user in users_to_reset:
|
for user in users_to_reset:
|
||||||
user.spend = 0.0
|
user.spend = 0.0
|
||||||
duration_s = _duration_in_seconds(duration=user.budget_duration)
|
duration_s = duration_in_seconds(duration=user.budget_duration)
|
||||||
user.budget_reset_at = now + timedelta(seconds=duration_s)
|
user.budget_reset_at = now + timedelta(seconds=duration_s)
|
||||||
|
|
||||||
await prisma_client.update_data(
|
await prisma_client.update_data(
|
||||||
|
@ -2561,7 +2486,7 @@ async def reset_budget(prisma_client: PrismaClient):
|
||||||
if teams_to_reset is not None and len(teams_to_reset) > 0:
|
if teams_to_reset is not None and len(teams_to_reset) > 0:
|
||||||
team_reset_requests = []
|
team_reset_requests = []
|
||||||
for team in teams_to_reset:
|
for team in teams_to_reset:
|
||||||
duration_s = _duration_in_seconds(duration=team.budget_duration)
|
duration_s = duration_in_seconds(duration=team.budget_duration)
|
||||||
reset_team_budget_request = ResetTeamBudgetRequest(
|
reset_team_budget_request = ResetTeamBudgetRequest(
|
||||||
team_id=team.team_id,
|
team_id=team.team_id,
|
||||||
spend=0.0,
|
spend=0.0,
|
||||||
|
|
|
@ -28,25 +28,54 @@ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
|
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
|
||||||
create_pass_through_route,
|
create_pass_through_route,
|
||||||
)
|
)
|
||||||
|
from litellm.secret_managers.main import get_secret_str
|
||||||
|
from litellm.types.passthrough_endpoints.vertex_ai import *
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
default_vertex_config = None
|
|
||||||
|
default_vertex_config: VertexPassThroughCredentials = VertexPassThroughCredentials()
|
||||||
|
|
||||||
|
|
||||||
def set_default_vertex_config(config):
|
def _get_vertex_env_vars() -> VertexPassThroughCredentials:
|
||||||
|
"""
|
||||||
|
Helper to get vertex pass through config from environment variables
|
||||||
|
|
||||||
|
The following environment variables are used:
|
||||||
|
- DEFAULT_VERTEXAI_PROJECT (project id)
|
||||||
|
- DEFAULT_VERTEXAI_LOCATION (location)
|
||||||
|
- DEFAULT_GOOGLE_APPLICATION_CREDENTIALS (path to credentials file)
|
||||||
|
"""
|
||||||
|
return VertexPassThroughCredentials(
|
||||||
|
vertex_project=get_secret_str("DEFAULT_VERTEXAI_PROJECT"),
|
||||||
|
vertex_location=get_secret_str("DEFAULT_VERTEXAI_LOCATION"),
|
||||||
|
vertex_credentials=get_secret_str("DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def set_default_vertex_config(config: Optional[dict] = None):
|
||||||
|
"""Sets vertex configuration from provided config and/or environment variables
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config (Optional[dict]): Configuration dictionary
|
||||||
|
Example: {
|
||||||
|
"vertex_project": "my-project-123",
|
||||||
|
"vertex_location": "us-central1",
|
||||||
|
"vertex_credentials": "os.environ/GOOGLE_CREDS"
|
||||||
|
}
|
||||||
|
"""
|
||||||
global default_vertex_config
|
global default_vertex_config
|
||||||
if config is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
if not isinstance(config, dict):
|
# Initialize config dictionary if None
|
||||||
raise ValueError("invalid config, vertex default config must be a dictionary")
|
if config is None:
|
||||||
|
default_vertex_config = _get_vertex_env_vars()
|
||||||
|
return
|
||||||
|
|
||||||
if isinstance(config, dict):
|
if isinstance(config, dict):
|
||||||
for key, value in config.items():
|
for key, value in config.items():
|
||||||
if isinstance(value, str) and value.startswith("os.environ/"):
|
if isinstance(value, str) and value.startswith("os.environ/"):
|
||||||
config[key] = litellm.get_secret(value)
|
config[key] = litellm.get_secret(value)
|
||||||
|
|
||||||
default_vertex_config = config
|
default_vertex_config = VertexPassThroughCredentials(**config)
|
||||||
|
|
||||||
|
|
||||||
def exception_handler(e: Exception):
|
def exception_handler(e: Exception):
|
||||||
|
@ -116,6 +145,10 @@ def construct_target_url(
|
||||||
"/vertex-ai/{endpoint:path}",
|
"/vertex-ai/{endpoint:path}",
|
||||||
methods=["GET", "POST", "PUT", "DELETE"],
|
methods=["GET", "POST", "PUT", "DELETE"],
|
||||||
tags=["Vertex AI Pass-through", "pass-through"],
|
tags=["Vertex AI Pass-through", "pass-through"],
|
||||||
|
include_in_schema=False,
|
||||||
|
)
|
||||||
|
@router.api_route(
|
||||||
|
"/vertex_ai/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"], tags=["Vertex AI Pass-through", "pass-through"]
|
||||||
)
|
)
|
||||||
async def vertex_proxy_route(
|
async def vertex_proxy_route(
|
||||||
endpoint: str,
|
endpoint: str,
|
||||||
|
@ -142,7 +175,7 @@ async def vertex_proxy_route(
|
||||||
vertex_project = None
|
vertex_project = None
|
||||||
vertex_location = None
|
vertex_location = None
|
||||||
# Use headers from the incoming request if default_vertex_config is not set
|
# Use headers from the incoming request if default_vertex_config is not set
|
||||||
if default_vertex_config is None:
|
if default_vertex_config.vertex_project is None:
|
||||||
headers = dict(request.headers) or {}
|
headers = dict(request.headers) or {}
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
"default_vertex_config not set, incoming request headers %s", headers
|
"default_vertex_config not set, incoming request headers %s", headers
|
||||||
|
@ -155,9 +188,9 @@ async def vertex_proxy_route(
|
||||||
headers.pop("content-length", None)
|
headers.pop("content-length", None)
|
||||||
headers.pop("host", None)
|
headers.pop("host", None)
|
||||||
else:
|
else:
|
||||||
vertex_project = default_vertex_config.get("vertex_project")
|
vertex_project = default_vertex_config.vertex_project
|
||||||
vertex_location = default_vertex_config.get("vertex_location")
|
vertex_location = default_vertex_config.vertex_location
|
||||||
vertex_credentials = default_vertex_config.get("vertex_credentials")
|
vertex_credentials = default_vertex_config.vertex_credentials
|
||||||
|
|
||||||
base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
|
base_target_url = f"https://{vertex_location}-aiplatform.googleapis.com/"
|
||||||
|
|
||||||
|
|
|
@ -18,13 +18,17 @@ anthropic:
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from datetime import datetime, timezone
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
from litellm.caching.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
from litellm.caching.redis_cache import RedisPipelineIncrementOperation
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||||
|
from litellm.litellm_core_utils.duration_parser import duration_in_seconds
|
||||||
from litellm.router_utils.cooldown_callbacks import (
|
from litellm.router_utils.cooldown_callbacks import (
|
||||||
_get_prometheus_logger_from_callbacks,
|
_get_prometheus_logger_from_callbacks,
|
||||||
)
|
)
|
||||||
|
@ -43,10 +47,14 @@ if TYPE_CHECKING:
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
DEFAULT_REDIS_SYNC_INTERVAL = 1
|
||||||
|
|
||||||
|
|
||||||
class ProviderBudgetLimiting(CustomLogger):
|
class ProviderBudgetLimiting(CustomLogger):
|
||||||
def __init__(self, router_cache: DualCache, provider_budget_config: dict):
|
def __init__(self, router_cache: DualCache, provider_budget_config: dict):
|
||||||
self.router_cache = router_cache
|
self.router_cache = router_cache
|
||||||
|
self.redis_increment_operation_queue: List[RedisPipelineIncrementOperation] = []
|
||||||
|
asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis())
|
||||||
|
|
||||||
# cast elements of provider_budget_config to ProviderBudgetInfo
|
# cast elements of provider_budget_config to ProviderBudgetInfo
|
||||||
for provider, config in provider_budget_config.items():
|
for provider, config in provider_budget_config.items():
|
||||||
|
@ -172,19 +180,76 @@ class ProviderBudgetLimiting(CustomLogger):
|
||||||
|
|
||||||
return potential_deployments
|
return potential_deployments
|
||||||
|
|
||||||
|
async def _get_or_set_budget_start_time(
|
||||||
|
self, start_time_key: str, current_time: float, ttl_seconds: int
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Checks if the key = `provider_budget_start_time:{provider}` exists in cache.
|
||||||
|
|
||||||
|
If it does, return the value.
|
||||||
|
If it does not, set the key to `current_time` and return the value.
|
||||||
|
"""
|
||||||
|
budget_start = await self.router_cache.async_get_cache(start_time_key)
|
||||||
|
if budget_start is None:
|
||||||
|
await self.router_cache.async_set_cache(
|
||||||
|
key=start_time_key, value=current_time, ttl=ttl_seconds
|
||||||
|
)
|
||||||
|
return current_time
|
||||||
|
return float(budget_start)
|
||||||
|
|
||||||
|
async def _handle_new_budget_window(
|
||||||
|
self,
|
||||||
|
spend_key: str,
|
||||||
|
start_time_key: str,
|
||||||
|
current_time: float,
|
||||||
|
response_cost: float,
|
||||||
|
ttl_seconds: int,
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Handle start of new budget window by resetting spend and start time
|
||||||
|
|
||||||
|
Enters this when:
|
||||||
|
- The budget does not exist in cache, so we need to set it
|
||||||
|
- The budget window has expired, so we need to reset everything
|
||||||
|
|
||||||
|
Does 2 things:
|
||||||
|
- stores key: `provider_spend:{provider}:1d`, value: response_cost
|
||||||
|
- stores key: `provider_budget_start_time:{provider}`, value: current_time.
|
||||||
|
This stores the start time of the new budget window
|
||||||
|
"""
|
||||||
|
await self.router_cache.async_set_cache(
|
||||||
|
key=spend_key, value=response_cost, ttl=ttl_seconds
|
||||||
|
)
|
||||||
|
await self.router_cache.async_set_cache(
|
||||||
|
key=start_time_key, value=current_time, ttl=ttl_seconds
|
||||||
|
)
|
||||||
|
return current_time
|
||||||
|
|
||||||
|
async def _increment_spend_in_current_window(
|
||||||
|
self, spend_key: str, response_cost: float, ttl: int
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Increment spend within existing budget window
|
||||||
|
|
||||||
|
Runs once the budget start time exists in Redis Cache (on the 2nd and subsequent requests to the same provider)
|
||||||
|
|
||||||
|
- Increments the spend in memory cache (so spend instantly updated in memory)
|
||||||
|
- Queues the increment operation to Redis Pipeline (using batched pipeline to optimize performance. Using Redis for multi instance environment of LiteLLM)
|
||||||
|
"""
|
||||||
|
await self.router_cache.in_memory_cache.async_increment(
|
||||||
|
key=spend_key,
|
||||||
|
value=response_cost,
|
||||||
|
ttl=ttl,
|
||||||
|
)
|
||||||
|
increment_op = RedisPipelineIncrementOperation(
|
||||||
|
key=spend_key,
|
||||||
|
increment_value=response_cost,
|
||||||
|
ttl=ttl,
|
||||||
|
)
|
||||||
|
self.redis_increment_operation_queue.append(increment_op)
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
"""
|
"""Original method now uses helper functions"""
|
||||||
Increment provider spend in DualCache (InMemory + Redis)
|
|
||||||
|
|
||||||
Handles saving current provider spend to Redis.
|
|
||||||
|
|
||||||
Spend is stored as:
|
|
||||||
provider_spend:{provider}:{time_period}
|
|
||||||
ex. provider_spend:openai:1d
|
|
||||||
ex. provider_spend:anthropic:7d
|
|
||||||
|
|
||||||
The time period is tracked for time_periods set in the provider budget config.
|
|
||||||
"""
|
|
||||||
verbose_router_logger.debug("in ProviderBudgetLimiting.async_log_success_event")
|
verbose_router_logger.debug("in ProviderBudgetLimiting.async_log_success_event")
|
||||||
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
||||||
"standard_logging_object", None
|
"standard_logging_object", None
|
||||||
|
@ -207,20 +272,146 @@ class ProviderBudgetLimiting(CustomLogger):
|
||||||
)
|
)
|
||||||
|
|
||||||
spend_key = f"provider_spend:{custom_llm_provider}:{budget_config.time_period}"
|
spend_key = f"provider_spend:{custom_llm_provider}:{budget_config.time_period}"
|
||||||
ttl_seconds = self.get_ttl_seconds(budget_config.time_period)
|
start_time_key = f"provider_budget_start_time:{custom_llm_provider}"
|
||||||
|
|
||||||
|
current_time = datetime.now(timezone.utc).timestamp()
|
||||||
|
ttl_seconds = duration_in_seconds(budget_config.time_period)
|
||||||
|
|
||||||
|
budget_start = await self._get_or_set_budget_start_time(
|
||||||
|
start_time_key=start_time_key,
|
||||||
|
current_time=current_time,
|
||||||
|
ttl_seconds=ttl_seconds,
|
||||||
|
)
|
||||||
|
|
||||||
|
if budget_start is None:
|
||||||
|
# First spend for this provider
|
||||||
|
budget_start = await self._handle_new_budget_window(
|
||||||
|
spend_key=spend_key,
|
||||||
|
start_time_key=start_time_key,
|
||||||
|
current_time=current_time,
|
||||||
|
response_cost=response_cost,
|
||||||
|
ttl_seconds=ttl_seconds,
|
||||||
|
)
|
||||||
|
elif (current_time - budget_start) > ttl_seconds:
|
||||||
|
# Budget window expired - reset everything
|
||||||
|
verbose_router_logger.debug("Budget window expired - resetting everything")
|
||||||
|
budget_start = await self._handle_new_budget_window(
|
||||||
|
spend_key=spend_key,
|
||||||
|
start_time_key=start_time_key,
|
||||||
|
current_time=current_time,
|
||||||
|
response_cost=response_cost,
|
||||||
|
ttl_seconds=ttl_seconds,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Within existing window - increment spend
|
||||||
|
remaining_time = ttl_seconds - (current_time - budget_start)
|
||||||
|
ttl_for_increment = int(remaining_time)
|
||||||
|
|
||||||
|
await self._increment_spend_in_current_window(
|
||||||
|
spend_key=spend_key, response_cost=response_cost, ttl=ttl_for_increment
|
||||||
|
)
|
||||||
|
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Incrementing spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}"
|
f"Incremented spend for {spend_key} by {response_cost}"
|
||||||
)
|
|
||||||
# Increment the spend in Redis and set TTL
|
|
||||||
await self.router_cache.async_increment_cache(
|
|
||||||
key=spend_key,
|
|
||||||
value=response_cost,
|
|
||||||
ttl=ttl_seconds,
|
|
||||||
)
|
|
||||||
verbose_router_logger.debug(
|
|
||||||
f"Incremented spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def periodic_sync_in_memory_spend_with_redis(self):
|
||||||
|
"""
|
||||||
|
Handler that triggers sync_in_memory_spend_with_redis every DEFAULT_REDIS_SYNC_INTERVAL seconds
|
||||||
|
|
||||||
|
Required for multi-instance environment usage of provider budgets
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await self._sync_in_memory_spend_with_redis()
|
||||||
|
await asyncio.sleep(
|
||||||
|
DEFAULT_REDIS_SYNC_INTERVAL
|
||||||
|
) # Wait for DEFAULT_REDIS_SYNC_INTERVAL seconds before next sync
|
||||||
|
except Exception as e:
|
||||||
|
verbose_router_logger.error(f"Error in periodic sync task: {str(e)}")
|
||||||
|
await asyncio.sleep(
|
||||||
|
DEFAULT_REDIS_SYNC_INTERVAL
|
||||||
|
) # Still wait DEFAULT_REDIS_SYNC_INTERVAL seconds on error before retrying
|
||||||
|
|
||||||
|
async def _push_in_memory_increments_to_redis(self):
|
||||||
|
"""
|
||||||
|
How this works:
|
||||||
|
- async_log_success_event collects all provider spend increments in `redis_increment_operation_queue`
|
||||||
|
- This function pushes all increments to Redis in a batched pipeline to optimize performance
|
||||||
|
|
||||||
|
Only runs if Redis is initialized
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not self.router_cache.redis_cache:
|
||||||
|
return # Redis is not initialized
|
||||||
|
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
"Pushing Redis Increment Pipeline for queue: %s",
|
||||||
|
self.redis_increment_operation_queue,
|
||||||
|
)
|
||||||
|
if len(self.redis_increment_operation_queue) > 0:
|
||||||
|
asyncio.create_task(
|
||||||
|
self.router_cache.redis_cache.async_increment_pipeline(
|
||||||
|
increment_list=self.redis_increment_operation_queue,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.redis_increment_operation_queue = []
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
verbose_router_logger.error(
|
||||||
|
f"Error syncing in-memory cache with Redis: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _sync_in_memory_spend_with_redis(self):
|
||||||
|
"""
|
||||||
|
Ensures in-memory cache is updated with latest Redis values for all provider spends.
|
||||||
|
|
||||||
|
Why Do we need this?
|
||||||
|
- Optimization to hit sub 100ms latency. Performance was impacted when redis was used for read/write per request
|
||||||
|
- Use provider budgets in multi-instance environment, we use Redis to sync spend across all instances
|
||||||
|
|
||||||
|
What this does:
|
||||||
|
1. Push all provider spend increments to Redis
|
||||||
|
2. Fetch all current provider spend from Redis to update in-memory cache
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# No need to sync if Redis cache is not initialized
|
||||||
|
if self.router_cache.redis_cache is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 1. Push all provider spend increments to Redis
|
||||||
|
await self._push_in_memory_increments_to_redis()
|
||||||
|
|
||||||
|
# 2. Fetch all current provider spend from Redis to update in-memory cache
|
||||||
|
cache_keys = []
|
||||||
|
for provider, config in self.provider_budget_config.items():
|
||||||
|
if config is None:
|
||||||
|
continue
|
||||||
|
cache_keys.append(f"provider_spend:{provider}:{config.time_period}")
|
||||||
|
|
||||||
|
# Batch fetch current spend values from Redis
|
||||||
|
redis_values = await self.router_cache.redis_cache.async_batch_get_cache(
|
||||||
|
key_list=cache_keys
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update in-memory cache with Redis values
|
||||||
|
if isinstance(redis_values, dict): # Check if redis_values is a dictionary
|
||||||
|
for key, value in redis_values.items():
|
||||||
|
if value is not None:
|
||||||
|
await self.router_cache.in_memory_cache.async_set_cache(
|
||||||
|
key=key, value=float(value)
|
||||||
|
)
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
f"Updated in-memory cache for {key}: {value}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
verbose_router_logger.error(
|
||||||
|
f"Error syncing in-memory cache with Redis: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
def _get_budget_config_for_provider(
|
def _get_budget_config_for_provider(
|
||||||
self, provider: str
|
self, provider: str
|
||||||
) -> Optional[ProviderBudgetInfo]:
|
) -> Optional[ProviderBudgetInfo]:
|
||||||
|
@ -242,15 +433,6 @@ class ProviderBudgetLimiting(CustomLogger):
|
||||||
return None
|
return None
|
||||||
return custom_llm_provider
|
return custom_llm_provider
|
||||||
|
|
||||||
def get_ttl_seconds(self, time_period: str) -> int:
|
|
||||||
"""
|
|
||||||
Convert time period (e.g., '1d', '30d') to seconds for Redis TTL
|
|
||||||
"""
|
|
||||||
if time_period.endswith("d"):
|
|
||||||
days = int(time_period[:-1])
|
|
||||||
return days * 24 * 60 * 60
|
|
||||||
raise ValueError(f"Unsupported time period format: {time_period}")
|
|
||||||
|
|
||||||
def _track_provider_remaining_budget_prometheus(
|
def _track_provider_remaining_budget_prometheus(
|
||||||
self, provider: str, spend: float, budget_limit: float
|
self, provider: str, spend: float, budget_limit: float
|
||||||
):
|
):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Literal
|
from typing import Literal, Optional, TypedDict
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMCacheType(str, Enum):
|
class LiteLLMCacheType(str, Enum):
|
||||||
|
@ -23,3 +23,13 @@ CachingSupportedCallTypes = Literal[
|
||||||
"arerank",
|
"arerank",
|
||||||
"rerank",
|
"rerank",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class RedisPipelineIncrementOperation(TypedDict):
|
||||||
|
"""
|
||||||
|
TypeDict for 1 Redis Pipeline Increment Operation
|
||||||
|
"""
|
||||||
|
|
||||||
|
key: str
|
||||||
|
increment_value: float
|
||||||
|
ttl: Optional[int]
|
||||||
|
|
18
litellm/types/passthrough_endpoints/vertex_ai.py
Normal file
18
litellm/types/passthrough_endpoints/vertex_ai.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
"""
|
||||||
|
Used for /vertex_ai/ pass through endpoints
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class VertexPassThroughCredentials(BaseModel):
|
||||||
|
# Example: vertex_project = "my-project-123"
|
||||||
|
vertex_project: Optional[str] = None
|
||||||
|
|
||||||
|
# Example: vertex_location = "us-central1"
|
||||||
|
vertex_location: Optional[str] = None
|
||||||
|
|
||||||
|
# Example: vertex_credentials = "/path/to/credentials.json" or "os.environ/GOOGLE_CREDS"
|
||||||
|
vertex_credentials: Optional[str] = None
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.52.15"
|
version = "1.52.16"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.52.15"
|
version = "1.52.16"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -45,7 +45,9 @@ print(env_keys)
|
||||||
# Parse the documentation to extract documented keys
|
# Parse the documentation to extract documented keys
|
||||||
repo_base = "./"
|
repo_base = "./"
|
||||||
print(os.listdir(repo_base))
|
print(os.listdir(repo_base))
|
||||||
docs_path = "./docs/my-website/docs/proxy/configs.md" # Path to the documentation
|
docs_path = (
|
||||||
|
"../../docs/my-website/docs/proxy/config_settings.md" # Path to the documentation
|
||||||
|
)
|
||||||
documented_keys = set()
|
documented_keys = set()
|
||||||
try:
|
try:
|
||||||
with open(docs_path, "r", encoding="utf-8") as docs_file:
|
with open(docs_path, "r", encoding="utf-8") as docs_file:
|
||||||
|
|
|
@ -34,7 +34,9 @@ for root, dirs, files in os.walk(repo_base):
|
||||||
# Parse the documentation to extract documented keys
|
# Parse the documentation to extract documented keys
|
||||||
repo_base = "./"
|
repo_base = "./"
|
||||||
print(os.listdir(repo_base))
|
print(os.listdir(repo_base))
|
||||||
docs_path = "./docs/my-website/docs/proxy/configs.md" # Path to the documentation
|
docs_path = (
|
||||||
|
"./docs/my-website/docs/proxy/config_settings.md" # Path to the documentation
|
||||||
|
)
|
||||||
documented_keys = set()
|
documented_keys = set()
|
||||||
try:
|
try:
|
||||||
with open(docs_path, "r", encoding="utf-8") as docs_file:
|
with open(docs_path, "r", encoding="utf-8") as docs_file:
|
||||||
|
|
|
@ -2433,3 +2433,48 @@ async def test_dual_cache_caching_batch_get_cache():
|
||||||
await dc.async_batch_get_cache(keys=["test_key1", "test_key2"])
|
await dc.async_batch_get_cache(keys=["test_key1", "test_key2"])
|
||||||
|
|
||||||
assert mock_async_get_cache.call_count == 1
|
assert mock_async_get_cache.call_count == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_redis_increment_pipeline():
|
||||||
|
"""Test Redis increment pipeline functionality"""
|
||||||
|
try:
|
||||||
|
from litellm.caching.redis_cache import RedisCache
|
||||||
|
|
||||||
|
litellm.set_verbose = True
|
||||||
|
redis_cache = RedisCache(
|
||||||
|
host=os.environ["REDIS_HOST"],
|
||||||
|
port=os.environ["REDIS_PORT"],
|
||||||
|
password=os.environ["REDIS_PASSWORD"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create test increment operations
|
||||||
|
increment_list = [
|
||||||
|
{"key": "test_key1", "increment_value": 1.5, "ttl": 60},
|
||||||
|
{"key": "test_key1", "increment_value": 1.1, "ttl": 58},
|
||||||
|
{"key": "test_key1", "increment_value": 0.4, "ttl": 55},
|
||||||
|
{"key": "test_key2", "increment_value": 2.5, "ttl": 60},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Test pipeline increment
|
||||||
|
results = await redis_cache.async_increment_pipeline(increment_list)
|
||||||
|
|
||||||
|
# Verify results
|
||||||
|
assert len(results) == 8 # 4 increment operations + 4 expire operations
|
||||||
|
|
||||||
|
# Verify the values were actually set in Redis
|
||||||
|
value1 = await redis_cache.async_get_cache("test_key1")
|
||||||
|
print("result in cache for key=test_key1", value1)
|
||||||
|
value2 = await redis_cache.async_get_cache("test_key2")
|
||||||
|
print("result in cache for key=test_key2", value2)
|
||||||
|
|
||||||
|
assert float(value1) == 3.0
|
||||||
|
assert float(value2) == 2.5
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
await redis_cache.async_delete_cache("test_key1")
|
||||||
|
await redis_cache.async_delete_cache("test_key2")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error occurred: {str(e)}")
|
||||||
|
raise e
|
||||||
|
|
|
@ -99,3 +99,29 @@ def test_caching_router():
|
||||||
|
|
||||||
|
|
||||||
# test_caching_router()
|
# test_caching_router()
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_redis_with_ssl():
|
||||||
|
"""
|
||||||
|
Test connecting to redis connection pool when ssl=None
|
||||||
|
|
||||||
|
|
||||||
|
Relevant issue:
|
||||||
|
User was seeing this error: `TypeError: AbstractConnection.__init__() got an unexpected keyword argument 'ssl'`
|
||||||
|
"""
|
||||||
|
from litellm._redis import get_redis_connection_pool, get_redis_async_client
|
||||||
|
|
||||||
|
# Get the connection pool with SSL
|
||||||
|
# REDIS_HOST_WITH_SSL is just a redis cloud instance with Transport layer security (TLS) enabled
|
||||||
|
pool = get_redis_connection_pool(
|
||||||
|
host=os.environ.get("REDIS_HOST_WITH_SSL"),
|
||||||
|
port=os.environ.get("REDIS_PORT_WITH_SSL"),
|
||||||
|
password=os.environ.get("REDIS_PASSWORD_WITH_SSL"),
|
||||||
|
ssl=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create Redis client with the pool
|
||||||
|
redis_client = get_redis_async_client(connection_pool=pool)
|
||||||
|
|
||||||
|
print("pinging redis")
|
||||||
|
print(await redis_client.ping())
|
||||||
|
print("pinged redis")
|
||||||
|
|
|
@ -17,7 +17,7 @@ from litellm.types.router import (
|
||||||
ProviderBudgetConfigType,
|
ProviderBudgetConfigType,
|
||||||
ProviderBudgetInfo,
|
ProviderBudgetInfo,
|
||||||
)
|
)
|
||||||
from litellm.caching.caching import DualCache
|
from litellm.caching.caching import DualCache, RedisCache
|
||||||
import logging
|
import logging
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -25,6 +25,27 @@ import litellm
|
||||||
verbose_router_logger.setLevel(logging.DEBUG)
|
verbose_router_logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_redis():
|
||||||
|
"""Cleanup Redis cache before each test"""
|
||||||
|
try:
|
||||||
|
import redis
|
||||||
|
|
||||||
|
print("cleaning up redis..")
|
||||||
|
|
||||||
|
redis_client = redis.Redis(
|
||||||
|
host=os.getenv("REDIS_HOST"),
|
||||||
|
port=int(os.getenv("REDIS_PORT")),
|
||||||
|
password=os.getenv("REDIS_PASSWORD"),
|
||||||
|
)
|
||||||
|
print("scan iter result", redis_client.scan_iter("provider_spend:*"))
|
||||||
|
# Delete all provider spend keys
|
||||||
|
for key in redis_client.scan_iter("provider_spend:*"):
|
||||||
|
print("deleting key", key)
|
||||||
|
redis_client.delete(key)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error cleaning up Redis: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_provider_budgets_e2e_test():
|
async def test_provider_budgets_e2e_test():
|
||||||
"""
|
"""
|
||||||
|
@ -34,6 +55,8 @@ async def test_provider_budgets_e2e_test():
|
||||||
- Next 3 requests all go to Azure
|
- Next 3 requests all go to Azure
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
cleanup_redis()
|
||||||
|
# Modify for test
|
||||||
provider_budget_config: ProviderBudgetConfigType = {
|
provider_budget_config: ProviderBudgetConfigType = {
|
||||||
"openai": ProviderBudgetInfo(time_period="1d", budget_limit=0.000000000001),
|
"openai": ProviderBudgetInfo(time_period="1d", budget_limit=0.000000000001),
|
||||||
"azure": ProviderBudgetInfo(time_period="1d", budget_limit=100),
|
"azure": ProviderBudgetInfo(time_period="1d", budget_limit=100),
|
||||||
|
@ -71,7 +94,7 @@ async def test_provider_budgets_e2e_test():
|
||||||
)
|
)
|
||||||
print(response)
|
print(response)
|
||||||
|
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(2.5)
|
||||||
|
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
response = await router.acompletion(
|
response = await router.acompletion(
|
||||||
|
@ -94,6 +117,7 @@ async def test_provider_budgets_e2e_test_expect_to_fail():
|
||||||
- first request passes, all subsequent requests fail
|
- first request passes, all subsequent requests fail
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
cleanup_redis()
|
||||||
|
|
||||||
# Note: We intentionally use a dictionary with string keys for budget_limit and time_period
|
# Note: We intentionally use a dictionary with string keys for budget_limit and time_period
|
||||||
# we want to test that the router can handle type conversion, since the proxy config yaml passes these values as a dictionary
|
# we want to test that the router can handle type conversion, since the proxy config yaml passes these values as a dictionary
|
||||||
|
@ -125,7 +149,7 @@ async def test_provider_budgets_e2e_test_expect_to_fail():
|
||||||
)
|
)
|
||||||
print(response)
|
print(response)
|
||||||
|
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(2.5)
|
||||||
|
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
with pytest.raises(Exception) as exc_info:
|
with pytest.raises(Exception) as exc_info:
|
||||||
|
@ -142,28 +166,13 @@ async def test_provider_budgets_e2e_test_expect_to_fail():
|
||||||
assert "Exceeded budget for provider" in str(exc_info.value)
|
assert "Exceeded budget for provider" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
def test_get_ttl_seconds():
|
@pytest.mark.asyncio
|
||||||
"""
|
async def test_get_llm_provider_for_deployment():
|
||||||
Test the get_ttl_seconds helper method"
|
|
||||||
|
|
||||||
"""
|
|
||||||
provider_budget = ProviderBudgetLimiting(
|
|
||||||
router_cache=DualCache(), provider_budget_config={}
|
|
||||||
)
|
|
||||||
|
|
||||||
assert provider_budget.get_ttl_seconds("1d") == 86400 # 1 day in seconds
|
|
||||||
assert provider_budget.get_ttl_seconds("7d") == 604800 # 7 days in seconds
|
|
||||||
assert provider_budget.get_ttl_seconds("30d") == 2592000 # 30 days in seconds
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="Unsupported time period format"):
|
|
||||||
provider_budget.get_ttl_seconds("1h")
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_llm_provider_for_deployment():
|
|
||||||
"""
|
"""
|
||||||
Test the _get_llm_provider_for_deployment helper method
|
Test the _get_llm_provider_for_deployment helper method
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
cleanup_redis()
|
||||||
provider_budget = ProviderBudgetLimiting(
|
provider_budget = ProviderBudgetLimiting(
|
||||||
router_cache=DualCache(), provider_budget_config={}
|
router_cache=DualCache(), provider_budget_config={}
|
||||||
)
|
)
|
||||||
|
@ -189,11 +198,13 @@ def test_get_llm_provider_for_deployment():
|
||||||
assert provider_budget._get_llm_provider_for_deployment(unknown_deployment) is None
|
assert provider_budget._get_llm_provider_for_deployment(unknown_deployment) is None
|
||||||
|
|
||||||
|
|
||||||
def test_get_budget_config_for_provider():
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_budget_config_for_provider():
|
||||||
"""
|
"""
|
||||||
Test the _get_budget_config_for_provider helper method
|
Test the _get_budget_config_for_provider helper method
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
cleanup_redis()
|
||||||
config = {
|
config = {
|
||||||
"openai": ProviderBudgetInfo(time_period="1d", budget_limit=100),
|
"openai": ProviderBudgetInfo(time_period="1d", budget_limit=100),
|
||||||
"anthropic": ProviderBudgetInfo(time_period="7d", budget_limit=500),
|
"anthropic": ProviderBudgetInfo(time_period="7d", budget_limit=500),
|
||||||
|
@ -223,6 +234,7 @@ async def test_prometheus_metric_tracking():
|
||||||
"""
|
"""
|
||||||
Test that the Prometheus metric for provider budget is tracked correctly
|
Test that the Prometheus metric for provider budget is tracked correctly
|
||||||
"""
|
"""
|
||||||
|
cleanup_redis()
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
from litellm.integrations.prometheus import PrometheusLogger
|
from litellm.integrations.prometheus import PrometheusLogger
|
||||||
|
|
||||||
|
@ -280,7 +292,187 @@ async def test_prometheus_metric_tracking():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("error", e)
|
print("error", e)
|
||||||
|
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(2.5)
|
||||||
|
|
||||||
# Verify the mock was called correctly
|
# Verify the mock was called correctly
|
||||||
mock_prometheus.track_provider_remaining_budget.assert_called_once()
|
mock_prometheus.track_provider_remaining_budget.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_handle_new_budget_window():
|
||||||
|
"""
|
||||||
|
Test _handle_new_budget_window helper method
|
||||||
|
|
||||||
|
Current
|
||||||
|
"""
|
||||||
|
cleanup_redis()
|
||||||
|
provider_budget = ProviderBudgetLimiting(
|
||||||
|
router_cache=DualCache(), provider_budget_config={}
|
||||||
|
)
|
||||||
|
|
||||||
|
spend_key = "provider_spend:openai:7d"
|
||||||
|
start_time_key = "provider_budget_start_time:openai"
|
||||||
|
current_time = 1000.0
|
||||||
|
response_cost = 0.5
|
||||||
|
ttl_seconds = 86400 # 1 day
|
||||||
|
|
||||||
|
# Test handling new budget window
|
||||||
|
new_start_time = await provider_budget._handle_new_budget_window(
|
||||||
|
spend_key=spend_key,
|
||||||
|
start_time_key=start_time_key,
|
||||||
|
current_time=current_time,
|
||||||
|
response_cost=response_cost,
|
||||||
|
ttl_seconds=ttl_seconds,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert new_start_time == current_time
|
||||||
|
|
||||||
|
# Verify the spend was set correctly
|
||||||
|
spend = await provider_budget.router_cache.async_get_cache(spend_key)
|
||||||
|
print("spend in cache for key", spend_key, "is", spend)
|
||||||
|
assert float(spend) == response_cost
|
||||||
|
|
||||||
|
# Verify start time was set correctly
|
||||||
|
start_time = await provider_budget.router_cache.async_get_cache(start_time_key)
|
||||||
|
print("start time in cache for key", start_time_key, "is", start_time)
|
||||||
|
assert float(start_time) == current_time
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_or_set_budget_start_time():
|
||||||
|
"""
|
||||||
|
Test _get_or_set_budget_start_time helper method
|
||||||
|
|
||||||
|
scenario 1: no existing start time in cache, should return current time
|
||||||
|
scenario 2: existing start time in cache, should return existing start time
|
||||||
|
"""
|
||||||
|
cleanup_redis()
|
||||||
|
provider_budget = ProviderBudgetLimiting(
|
||||||
|
router_cache=DualCache(), provider_budget_config={}
|
||||||
|
)
|
||||||
|
|
||||||
|
start_time_key = "test_start_time"
|
||||||
|
current_time = 1000.0
|
||||||
|
ttl_seconds = 86400 # 1 day
|
||||||
|
|
||||||
|
# When there is no existing start time, we should set it to the current time
|
||||||
|
start_time = await provider_budget._get_or_set_budget_start_time(
|
||||||
|
start_time_key=start_time_key,
|
||||||
|
current_time=current_time,
|
||||||
|
ttl_seconds=ttl_seconds,
|
||||||
|
)
|
||||||
|
print("budget start time when no existing start time is in cache", start_time)
|
||||||
|
assert start_time == current_time
|
||||||
|
|
||||||
|
# When there is an existing start time, we should return it even if the current time is later
|
||||||
|
new_current_time = 2000.0
|
||||||
|
existing_start_time = await provider_budget._get_or_set_budget_start_time(
|
||||||
|
start_time_key=start_time_key,
|
||||||
|
current_time=new_current_time,
|
||||||
|
ttl_seconds=ttl_seconds,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"budget start time when existing start time is in cache, but current time is later",
|
||||||
|
existing_start_time,
|
||||||
|
)
|
||||||
|
assert existing_start_time == current_time # Should return the original start time
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_increment_spend_in_current_window():
|
||||||
|
"""
|
||||||
|
Test _increment_spend_in_current_window helper method
|
||||||
|
|
||||||
|
Expected behavior:
|
||||||
|
- Increment the spend in memory cache
|
||||||
|
- Queue the increment operation to Redis
|
||||||
|
"""
|
||||||
|
cleanup_redis()
|
||||||
|
provider_budget = ProviderBudgetLimiting(
|
||||||
|
router_cache=DualCache(), provider_budget_config={}
|
||||||
|
)
|
||||||
|
|
||||||
|
spend_key = "provider_spend:openai:1d"
|
||||||
|
response_cost = 0.5
|
||||||
|
ttl = 86400 # 1 day
|
||||||
|
|
||||||
|
# Set initial spend
|
||||||
|
await provider_budget.router_cache.async_set_cache(
|
||||||
|
key=spend_key, value=1.0, ttl=ttl
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test incrementing spend
|
||||||
|
await provider_budget._increment_spend_in_current_window(
|
||||||
|
spend_key=spend_key,
|
||||||
|
response_cost=response_cost,
|
||||||
|
ttl=ttl,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the spend was incremented correctly in memory
|
||||||
|
spend = await provider_budget.router_cache.async_get_cache(spend_key)
|
||||||
|
assert float(spend) == 1.5
|
||||||
|
|
||||||
|
# Verify the increment operation was queued for Redis
|
||||||
|
print(
|
||||||
|
"redis_increment_operation_queue",
|
||||||
|
provider_budget.redis_increment_operation_queue,
|
||||||
|
)
|
||||||
|
assert len(provider_budget.redis_increment_operation_queue) == 1
|
||||||
|
queued_op = provider_budget.redis_increment_operation_queue[0]
|
||||||
|
assert queued_op["key"] == spend_key
|
||||||
|
assert queued_op["increment_value"] == response_cost
|
||||||
|
assert queued_op["ttl"] == ttl
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_sync_in_memory_spend_with_redis():
|
||||||
|
"""
|
||||||
|
Test _sync_in_memory_spend_with_redis helper method
|
||||||
|
|
||||||
|
Expected behavior:
|
||||||
|
- Push all provider spend increments to Redis
|
||||||
|
- Fetch all current provider spend from Redis to update in-memory cache
|
||||||
|
"""
|
||||||
|
cleanup_redis()
|
||||||
|
provider_budget_config = {
|
||||||
|
"openai": ProviderBudgetInfo(time_period="1d", budget_limit=100),
|
||||||
|
"anthropic": ProviderBudgetInfo(time_period="1d", budget_limit=200),
|
||||||
|
}
|
||||||
|
|
||||||
|
provider_budget = ProviderBudgetLimiting(
|
||||||
|
router_cache=DualCache(
|
||||||
|
redis_cache=RedisCache(
|
||||||
|
host=os.getenv("REDIS_HOST"),
|
||||||
|
port=int(os.getenv("REDIS_PORT")),
|
||||||
|
password=os.getenv("REDIS_PASSWORD"),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
provider_budget_config=provider_budget_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set some values in Redis
|
||||||
|
spend_key_openai = "provider_spend:openai:1d"
|
||||||
|
spend_key_anthropic = "provider_spend:anthropic:1d"
|
||||||
|
|
||||||
|
await provider_budget.router_cache.redis_cache.async_set_cache(
|
||||||
|
key=spend_key_openai, value=50.0
|
||||||
|
)
|
||||||
|
await provider_budget.router_cache.redis_cache.async_set_cache(
|
||||||
|
key=spend_key_anthropic, value=75.0
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test syncing with Redis
|
||||||
|
await provider_budget._sync_in_memory_spend_with_redis()
|
||||||
|
|
||||||
|
# Verify in-memory cache was updated
|
||||||
|
openai_spend = await provider_budget.router_cache.in_memory_cache.async_get_cache(
|
||||||
|
spend_key_openai
|
||||||
|
)
|
||||||
|
anthropic_spend = (
|
||||||
|
await provider_budget.router_cache.in_memory_cache.async_get_cache(
|
||||||
|
spend_key_anthropic
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert float(openai_spend) == 50.0
|
||||||
|
assert float(anthropic_spend) == 75.0
|
||||||
|
|
|
@ -17,7 +17,7 @@ import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, headers
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, headers
|
||||||
from litellm.proxy.utils import (
|
from litellm.proxy.utils import (
|
||||||
_duration_in_seconds,
|
duration_in_seconds,
|
||||||
_extract_from_regex,
|
_extract_from_regex,
|
||||||
get_last_day_of_month,
|
get_last_day_of_month,
|
||||||
)
|
)
|
||||||
|
@ -593,7 +593,7 @@ def test_duration_in_seconds():
|
||||||
duration_until_next_month = next_month - current_time
|
duration_until_next_month = next_month - current_time
|
||||||
expected_duration = int(duration_until_next_month.total_seconds())
|
expected_duration = int(duration_until_next_month.total_seconds())
|
||||||
|
|
||||||
value = _duration_in_seconds(duration="1mo")
|
value = duration_in_seconds(duration="1mo")
|
||||||
|
|
||||||
assert value - expected_duration < 2
|
assert value - expected_duration < 2
|
||||||
|
|
||||||
|
|
|
@ -141,7 +141,9 @@ async def test_anthropic_basic_completion_with_headers():
|
||||||
), "Start time should be before end time"
|
), "Start time should be before end time"
|
||||||
|
|
||||||
# Metadata assertions
|
# Metadata assertions
|
||||||
assert log_entry["cache_hit"] == "False", "Cache should be off"
|
assert (
|
||||||
|
str(log_entry["cache_hit"]).lower() != "true"
|
||||||
|
), "Cache should be off"
|
||||||
assert log_entry["request_tags"] == [
|
assert log_entry["request_tags"] == [
|
||||||
"test-tag-1",
|
"test-tag-1",
|
||||||
"test-tag-2",
|
"test-tag-2",
|
||||||
|
@ -251,7 +253,9 @@ async def test_anthropic_streaming_with_headers():
|
||||||
), "Start time should be before end time"
|
), "Start time should be before end time"
|
||||||
|
|
||||||
# Metadata assertions
|
# Metadata assertions
|
||||||
assert log_entry["cache_hit"] == "False", "Cache should be off"
|
assert (
|
||||||
|
str(log_entry["cache_hit"]).lower() != "true"
|
||||||
|
), "Cache should be off"
|
||||||
assert log_entry["request_tags"] == [
|
assert log_entry["request_tags"] == [
|
||||||
"test-tag-stream-1",
|
"test-tag-stream-1",
|
||||||
"test-tag-stream-2",
|
"test-tag-stream-2",
|
||||||
|
|
123
tests/pass_through_tests/test_gemini_with_spend.test.js
Normal file
123
tests/pass_through_tests/test_gemini_with_spend.test.js
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
const { GoogleGenerativeAI } = require("@google/generative-ai");
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
// Import fetch if the SDK uses it
|
||||||
|
const originalFetch = global.fetch || require('node-fetch');
|
||||||
|
|
||||||
|
let lastCallId;
|
||||||
|
|
||||||
|
// Monkey-patch the fetch used internally
|
||||||
|
global.fetch = async function patchedFetch(url, options) {
|
||||||
|
const response = await originalFetch(url, options);
|
||||||
|
|
||||||
|
// Store the call ID if it exists
|
||||||
|
lastCallId = response.headers.get('x-litellm-call-id');
|
||||||
|
|
||||||
|
return response;
|
||||||
|
};
|
||||||
|
|
||||||
|
describe('Gemini AI Tests', () => {
|
||||||
|
test('should successfully generate non-streaming content with tags', async () => {
|
||||||
|
const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key
|
||||||
|
|
||||||
|
const requestOptions = {
|
||||||
|
baseUrl: 'http://127.0.0.1:4000/gemini',
|
||||||
|
customHeaders: {
|
||||||
|
"tags": "gemini-js-sdk,pass-through-endpoint"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const model = genAI.getGenerativeModel({
|
||||||
|
model: 'gemini-pro'
|
||||||
|
}, requestOptions);
|
||||||
|
|
||||||
|
const prompt = 'Say "hello test" and nothing else';
|
||||||
|
|
||||||
|
const result = await model.generateContent(prompt);
|
||||||
|
expect(result).toBeDefined();
|
||||||
|
|
||||||
|
// Use the captured callId
|
||||||
|
const callId = lastCallId;
|
||||||
|
console.log("Captured Call ID:", callId);
|
||||||
|
|
||||||
|
// Wait for spend to be logged
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 15000));
|
||||||
|
|
||||||
|
// Check spend logs
|
||||||
|
const spendResponse = await fetch(
|
||||||
|
`http://127.0.0.1:4000/spend/logs?request_id=${callId}`,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Authorization': 'Bearer sk-1234'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
const spendData = await spendResponse.json();
|
||||||
|
console.log("spendData", spendData)
|
||||||
|
expect(spendData).toBeDefined();
|
||||||
|
expect(spendData[0].request_id).toBe(callId);
|
||||||
|
expect(spendData[0].call_type).toBe('pass_through_endpoint');
|
||||||
|
expect(spendData[0].request_tags).toEqual(['gemini-js-sdk', 'pass-through-endpoint']);
|
||||||
|
expect(spendData[0].metadata).toHaveProperty('user_api_key');
|
||||||
|
expect(spendData[0].model).toContain('gemini');
|
||||||
|
expect(spendData[0].spend).toBeGreaterThan(0);
|
||||||
|
}, 25000);
|
||||||
|
|
||||||
|
test('should successfully generate streaming content with tags', async () => {
|
||||||
|
const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key
|
||||||
|
|
||||||
|
const requestOptions = {
|
||||||
|
baseUrl: 'http://127.0.0.1:4000/gemini',
|
||||||
|
customHeaders: {
|
||||||
|
"tags": "gemini-js-sdk,pass-through-endpoint"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const model = genAI.getGenerativeModel({
|
||||||
|
model: 'gemini-pro'
|
||||||
|
}, requestOptions);
|
||||||
|
|
||||||
|
const prompt = 'Say "hello test" and nothing else';
|
||||||
|
|
||||||
|
const streamingResult = await model.generateContentStream(prompt);
|
||||||
|
expect(streamingResult).toBeDefined();
|
||||||
|
|
||||||
|
for await (const chunk of streamingResult.stream) {
|
||||||
|
console.log('stream chunk:', JSON.stringify(chunk));
|
||||||
|
expect(chunk).toBeDefined();
|
||||||
|
}
|
||||||
|
|
||||||
|
const aggregatedResponse = await streamingResult.response;
|
||||||
|
console.log('aggregated response:', JSON.stringify(aggregatedResponse));
|
||||||
|
expect(aggregatedResponse).toBeDefined();
|
||||||
|
|
||||||
|
// Use the captured callId
|
||||||
|
const callId = lastCallId;
|
||||||
|
console.log("Captured Call ID:", callId);
|
||||||
|
|
||||||
|
// Wait for spend to be logged
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 15000));
|
||||||
|
|
||||||
|
// Check spend logs
|
||||||
|
const spendResponse = await fetch(
|
||||||
|
`http://127.0.0.1:4000/spend/logs?request_id=${callId}`,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Authorization': 'Bearer sk-1234'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
const spendData = await spendResponse.json();
|
||||||
|
console.log("spendData", spendData)
|
||||||
|
expect(spendData).toBeDefined();
|
||||||
|
expect(spendData[0].request_id).toBe(callId);
|
||||||
|
expect(spendData[0].call_type).toBe('pass_through_endpoint');
|
||||||
|
expect(spendData[0].request_tags).toEqual(['gemini-js-sdk', 'pass-through-endpoint']);
|
||||||
|
expect(spendData[0].metadata).toHaveProperty('user_api_key');
|
||||||
|
expect(spendData[0].model).toContain('gemini');
|
||||||
|
expect(spendData[0].spend).toBeGreaterThan(0);
|
||||||
|
}, 25000);
|
||||||
|
});
|
55
tests/pass_through_tests/test_local_gemini.js
Normal file
55
tests/pass_through_tests/test_local_gemini.js
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
const { GoogleGenerativeAI, ModelParams, RequestOptions } = require("@google/generative-ai");
|
||||||
|
|
||||||
|
const modelParams = {
|
||||||
|
model: 'gemini-pro',
|
||||||
|
};
|
||||||
|
|
||||||
|
const requestOptions = {
|
||||||
|
baseUrl: 'http://127.0.0.1:4000/gemini',
|
||||||
|
customHeaders: {
|
||||||
|
"tags": "gemini-js-sdk,gemini-pro"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const genAI = new GoogleGenerativeAI("sk-1234"); // litellm proxy API key
|
||||||
|
const model = genAI.getGenerativeModel(modelParams, requestOptions);
|
||||||
|
|
||||||
|
const testPrompt = "Explain how AI works";
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log("making request")
|
||||||
|
try {
|
||||||
|
const result = await model.generateContent(testPrompt);
|
||||||
|
console.log(result.response.text());
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error details:', {
|
||||||
|
name: error.name,
|
||||||
|
message: error.message,
|
||||||
|
cause: error.cause,
|
||||||
|
// Check if there's a network error
|
||||||
|
isNetworkError: error instanceof TypeError && error.message === 'fetch failed'
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check if the server is running
|
||||||
|
if (error instanceof TypeError && error.message === 'fetch failed') {
|
||||||
|
console.error('Make sure your local server is running at http://localhost:4000');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async function main_streaming() {
|
||||||
|
try {
|
||||||
|
const streamingResult = await model.generateContentStream(testPrompt);
|
||||||
|
for await (const item of streamingResult.stream) {
|
||||||
|
console.log('stream chunk: ', JSON.stringify(item));
|
||||||
|
}
|
||||||
|
const aggregatedResponse = await streamingResult.response;
|
||||||
|
console.log('aggregated response: ', JSON.stringify(aggregatedResponse));
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error details:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// main();
|
||||||
|
main_streaming();
|
|
@ -1,31 +1,22 @@
|
||||||
const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
|
const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
|
||||||
|
|
||||||
|
|
||||||
// Import fetch if the SDK uses it
|
|
||||||
const originalFetch = global.fetch || require('node-fetch');
|
|
||||||
|
|
||||||
// Monkey-patch the fetch used internally
|
|
||||||
global.fetch = async function patchedFetch(url, options) {
|
|
||||||
// Modify the URL to use HTTP instead of HTTPS
|
|
||||||
if (url.startsWith('https://localhost:4000')) {
|
|
||||||
url = url.replace('https://', 'http://');
|
|
||||||
}
|
|
||||||
console.log('Patched fetch sending request to:', url);
|
|
||||||
return originalFetch(url, options);
|
|
||||||
};
|
|
||||||
|
|
||||||
const vertexAI = new VertexAI({
|
const vertexAI = new VertexAI({
|
||||||
project: 'adroit-crow-413218',
|
project: 'adroit-crow-413218',
|
||||||
location: 'us-central1',
|
location: 'us-central1',
|
||||||
apiEndpoint: "localhost:4000/vertex-ai"
|
apiEndpoint: "127.0.0.1:4000/vertex-ai"
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Create customHeaders using Headers
|
||||||
|
const customHeaders = new Headers({
|
||||||
|
"X-Litellm-Api-Key": "sk-1234",
|
||||||
|
tags: "vertexjs,test-2"
|
||||||
|
});
|
||||||
|
|
||||||
// Use customHeaders in RequestOptions
|
// Use customHeaders in RequestOptions
|
||||||
const requestOptions = {
|
const requestOptions = {
|
||||||
customHeaders: new Headers({
|
customHeaders: customHeaders,
|
||||||
"x-litellm-api-key": "sk-1234"
|
|
||||||
})
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const generativeModel = vertexAI.getGenerativeModel(
|
const generativeModel = vertexAI.getGenerativeModel(
|
||||||
|
@ -33,7 +24,7 @@ const generativeModel = vertexAI.getGenerativeModel(
|
||||||
requestOptions
|
requestOptions
|
||||||
);
|
);
|
||||||
|
|
||||||
async function streamingResponse() {
|
async function testModel() {
|
||||||
try {
|
try {
|
||||||
const request = {
|
const request = {
|
||||||
contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
|
contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
|
||||||
|
@ -49,20 +40,4 @@ async function streamingResponse() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
testModel();
|
||||||
async function nonStreamingResponse() {
|
|
||||||
try {
|
|
||||||
const request = {
|
|
||||||
contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
|
|
||||||
};
|
|
||||||
const response = await generativeModel.generateContent(request);
|
|
||||||
console.log('non streaming response: ', JSON.stringify(response));
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error:', error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
streamingResponse();
|
|
||||||
nonStreamingResponse();
|
|
|
@ -99,7 +99,7 @@ async def test_basic_vertex_ai_pass_through_with_spendlog():
|
||||||
vertexai.init(
|
vertexai.init(
|
||||||
project="adroit-crow-413218",
|
project="adroit-crow-413218",
|
||||||
location="us-central1",
|
location="us-central1",
|
||||||
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai",
|
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai",
|
||||||
api_transport="rest",
|
api_transport="rest",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ async def test_basic_vertex_ai_pass_through_streaming_with_spendlog():
|
||||||
vertexai.init(
|
vertexai.init(
|
||||||
project="adroit-crow-413218",
|
project="adroit-crow-413218",
|
||||||
location="us-central1",
|
location="us-central1",
|
||||||
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai",
|
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai",
|
||||||
api_transport="rest",
|
api_transport="rest",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ async def test_vertex_ai_pass_through_endpoint_context_caching():
|
||||||
vertexai.init(
|
vertexai.init(
|
||||||
project="adroit-crow-413218",
|
project="adroit-crow-413218",
|
||||||
location="us-central1",
|
location="us-central1",
|
||||||
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex-ai",
|
api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai",
|
||||||
api_transport="rest",
|
api_transport="rest",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
194
tests/pass_through_tests/test_vertex_with_spend.test.js
Normal file
194
tests/pass_through_tests/test_vertex_with_spend.test.js
Normal file
|
@ -0,0 +1,194 @@
|
||||||
|
const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
const os = require('os');
|
||||||
|
const { writeFileSync } = require('fs');
|
||||||
|
|
||||||
|
|
||||||
|
// Import fetch if the SDK uses it
|
||||||
|
const originalFetch = global.fetch || require('node-fetch');
|
||||||
|
|
||||||
|
let lastCallId;
|
||||||
|
|
||||||
|
// Monkey-patch the fetch used internally
|
||||||
|
global.fetch = async function patchedFetch(url, options) {
|
||||||
|
// Modify the URL to use HTTP instead of HTTPS
|
||||||
|
if (url.startsWith('https://127.0.0.1:4000')) {
|
||||||
|
url = url.replace('https://', 'http://');
|
||||||
|
}
|
||||||
|
console.log('Patched fetch sending request to:', url);
|
||||||
|
|
||||||
|
const response = await originalFetch(url, options);
|
||||||
|
|
||||||
|
// Store the call ID if it exists
|
||||||
|
lastCallId = response.headers.get('x-litellm-call-id');
|
||||||
|
|
||||||
|
return response;
|
||||||
|
};
|
||||||
|
|
||||||
|
function loadVertexAiCredentials() {
|
||||||
|
console.log("loading vertex ai credentials");
|
||||||
|
const filepath = path.dirname(__filename);
|
||||||
|
const vertexKeyPath = path.join(filepath, "vertex_key.json");
|
||||||
|
|
||||||
|
// Initialize default empty service account data
|
||||||
|
let serviceAccountKeyData = {};
|
||||||
|
|
||||||
|
// Try to read existing vertex_key.json
|
||||||
|
try {
|
||||||
|
const content = fs.readFileSync(vertexKeyPath, 'utf8');
|
||||||
|
if (content && content.trim()) {
|
||||||
|
serviceAccountKeyData = JSON.parse(content);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// File doesn't exist or is invalid, continue with empty object
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update with environment variables
|
||||||
|
const privateKeyId = process.env.VERTEX_AI_PRIVATE_KEY_ID || "";
|
||||||
|
const privateKey = (process.env.VERTEX_AI_PRIVATE_KEY || "").replace(/\\n/g, "\n");
|
||||||
|
|
||||||
|
serviceAccountKeyData.private_key_id = privateKeyId;
|
||||||
|
serviceAccountKeyData.private_key = privateKey;
|
||||||
|
|
||||||
|
// Create temporary file
|
||||||
|
const tempFilePath = path.join(os.tmpdir(), `vertex-credentials-${Date.now()}.json`);
|
||||||
|
writeFileSync(tempFilePath, JSON.stringify(serviceAccountKeyData, null, 2));
|
||||||
|
|
||||||
|
// Set environment variable
|
||||||
|
process.env.GOOGLE_APPLICATION_CREDENTIALS = tempFilePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run credential loading before tests
|
||||||
|
beforeAll(() => {
|
||||||
|
loadVertexAiCredentials();
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
describe('Vertex AI Tests', () => {
|
||||||
|
test('should successfully generate non-streaming content with tags', async () => {
|
||||||
|
const vertexAI = new VertexAI({
|
||||||
|
project: 'adroit-crow-413218',
|
||||||
|
location: 'us-central1',
|
||||||
|
apiEndpoint: "127.0.0.1:4000/vertex_ai"
|
||||||
|
});
|
||||||
|
|
||||||
|
const customHeaders = new Headers({
|
||||||
|
"x-litellm-api-key": "sk-1234",
|
||||||
|
"tags": "vertex-js-sdk,pass-through-endpoint"
|
||||||
|
});
|
||||||
|
|
||||||
|
const requestOptions = {
|
||||||
|
customHeaders: customHeaders
|
||||||
|
};
|
||||||
|
|
||||||
|
const generativeModel = vertexAI.getGenerativeModel(
|
||||||
|
{ model: 'gemini-1.0-pro' },
|
||||||
|
requestOptions
|
||||||
|
);
|
||||||
|
|
||||||
|
const request = {
|
||||||
|
contents: [{role: 'user', parts: [{text: 'Say "hello test" and nothing else'}]}]
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await generativeModel.generateContent(request);
|
||||||
|
expect(result).toBeDefined();
|
||||||
|
|
||||||
|
// Use the captured callId
|
||||||
|
const callId = lastCallId;
|
||||||
|
console.log("Captured Call ID:", callId);
|
||||||
|
|
||||||
|
// Wait for spend to be logged
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 15000));
|
||||||
|
|
||||||
|
// Check spend logs
|
||||||
|
const spendResponse = await fetch(
|
||||||
|
`http://127.0.0.1:4000/spend/logs?request_id=${callId}`,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Authorization': 'Bearer sk-1234'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
const spendData = await spendResponse.json();
|
||||||
|
console.log("spendData", spendData)
|
||||||
|
expect(spendData).toBeDefined();
|
||||||
|
expect(spendData[0].request_id).toBe(callId);
|
||||||
|
expect(spendData[0].call_type).toBe('pass_through_endpoint');
|
||||||
|
expect(spendData[0].request_tags).toEqual(['vertex-js-sdk', 'pass-through-endpoint']);
|
||||||
|
expect(spendData[0].metadata).toHaveProperty('user_api_key');
|
||||||
|
expect(spendData[0].model).toContain('gemini');
|
||||||
|
expect(spendData[0].spend).toBeGreaterThan(0);
|
||||||
|
}, 25000);
|
||||||
|
|
||||||
|
test('should successfully generate streaming content with tags', async () => {
|
||||||
|
const vertexAI = new VertexAI({
|
||||||
|
project: 'adroit-crow-413218',
|
||||||
|
location: 'us-central1',
|
||||||
|
apiEndpoint: "127.0.0.1:4000/vertex_ai"
|
||||||
|
});
|
||||||
|
|
||||||
|
const customHeaders = new Headers({
|
||||||
|
"x-litellm-api-key": "sk-1234",
|
||||||
|
"tags": "vertex-js-sdk,pass-through-endpoint"
|
||||||
|
});
|
||||||
|
|
||||||
|
const requestOptions = {
|
||||||
|
customHeaders: customHeaders
|
||||||
|
};
|
||||||
|
|
||||||
|
const generativeModel = vertexAI.getGenerativeModel(
|
||||||
|
{ model: 'gemini-1.0-pro' },
|
||||||
|
requestOptions
|
||||||
|
);
|
||||||
|
|
||||||
|
const request = {
|
||||||
|
contents: [{role: 'user', parts: [{text: 'Say "hello test" and nothing else'}]}]
|
||||||
|
};
|
||||||
|
|
||||||
|
const streamingResult = await generativeModel.generateContentStream(request);
|
||||||
|
expect(streamingResult).toBeDefined();
|
||||||
|
|
||||||
|
|
||||||
|
// Add some assertions
|
||||||
|
expect(streamingResult).toBeDefined();
|
||||||
|
|
||||||
|
for await (const item of streamingResult.stream) {
|
||||||
|
console.log('stream chunk:', JSON.stringify(item));
|
||||||
|
expect(item).toBeDefined();
|
||||||
|
}
|
||||||
|
|
||||||
|
const aggregatedResponse = await streamingResult.response;
|
||||||
|
console.log('aggregated response:', JSON.stringify(aggregatedResponse));
|
||||||
|
expect(aggregatedResponse).toBeDefined();
|
||||||
|
|
||||||
|
// Use the captured callId
|
||||||
|
const callId = lastCallId;
|
||||||
|
console.log("Captured Call ID:", callId);
|
||||||
|
|
||||||
|
// Wait for spend to be logged
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 15000));
|
||||||
|
|
||||||
|
// Check spend logs
|
||||||
|
const spendResponse = await fetch(
|
||||||
|
`http://127.0.0.1:4000/spend/logs?request_id=${callId}`,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Authorization': 'Bearer sk-1234'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
const spendData = await spendResponse.json();
|
||||||
|
console.log("spendData", spendData)
|
||||||
|
expect(spendData).toBeDefined();
|
||||||
|
expect(spendData[0].request_id).toBe(callId);
|
||||||
|
expect(spendData[0].call_type).toBe('pass_through_endpoint');
|
||||||
|
expect(spendData[0].request_tags).toEqual(['vertex-js-sdk', 'pass-through-endpoint']);
|
||||||
|
expect(spendData[0].metadata).toHaveProperty('user_api_key');
|
||||||
|
expect(spendData[0].model).toContain('gemini');
|
||||||
|
expect(spendData[0].spend).toBeGreaterThan(0);
|
||||||
|
}, 25000);
|
||||||
|
});
|
322
tests/pass_through_unit_tests/test_pass_through_unit_tests.py
Normal file
322
tests/pass_through_unit_tests/test_pass_through_unit_tests.py
Normal file
|
@ -0,0 +1,322 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from unittest.mock import AsyncMock, Mock, patch, MagicMock
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
import fastapi
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
import litellm
|
||||||
|
from typing import AsyncGenerator
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
|
from litellm.proxy.pass_through_endpoints.types import EndpointType
|
||||||
|
from litellm.proxy.pass_through_endpoints.success_handler import (
|
||||||
|
PassThroughEndpointLogging,
|
||||||
|
)
|
||||||
|
from litellm.proxy.pass_through_endpoints.streaming_handler import (
|
||||||
|
PassThroughStreamingHandler,
|
||||||
|
)
|
||||||
|
|
||||||
|
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
|
||||||
|
pass_through_request,
|
||||||
|
)
|
||||||
|
from fastapi import Request
|
||||||
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
|
||||||
|
_init_kwargs_for_pass_through_endpoint,
|
||||||
|
_update_metadata_with_tags_in_header,
|
||||||
|
)
|
||||||
|
from litellm.proxy.pass_through_endpoints.types import PassthroughStandardLoggingPayload
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_request():
|
||||||
|
# Create a mock request with headers
|
||||||
|
class QueryParams:
|
||||||
|
def __init__(self):
|
||||||
|
self._dict = {}
|
||||||
|
|
||||||
|
class MockRequest:
|
||||||
|
def __init__(
|
||||||
|
self, headers=None, method="POST", request_body: Optional[dict] = None
|
||||||
|
):
|
||||||
|
self.headers = headers or {}
|
||||||
|
self.query_params = QueryParams()
|
||||||
|
self.method = method
|
||||||
|
self.request_body = request_body or {}
|
||||||
|
|
||||||
|
async def body(self) -> bytes:
|
||||||
|
return bytes(json.dumps(self.request_body), "utf-8")
|
||||||
|
|
||||||
|
return MockRequest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_user_api_key_dict():
|
||||||
|
return UserAPIKeyAuth(
|
||||||
|
api_key="test-key",
|
||||||
|
user_id="test-user",
|
||||||
|
team_id="test-team",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_metadata_with_tags_in_header_no_tags(mock_request):
|
||||||
|
"""
|
||||||
|
No tags should be added to metadata if they do not exist in headers
|
||||||
|
"""
|
||||||
|
# Test when no tags are present in headers
|
||||||
|
request = mock_request(headers={})
|
||||||
|
metadata = {"existing": "value"}
|
||||||
|
|
||||||
|
result = _update_metadata_with_tags_in_header(request=request, metadata=metadata)
|
||||||
|
|
||||||
|
assert result == {"existing": "value"}
|
||||||
|
assert "tags" not in result
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_metadata_with_tags_in_header_with_tags(mock_request):
|
||||||
|
"""
|
||||||
|
Tags should be added to metadata if they exist in headers
|
||||||
|
"""
|
||||||
|
# Test when tags are present in headers
|
||||||
|
request = mock_request(headers={"tags": "tag1,tag2,tag3"})
|
||||||
|
metadata = {"existing": "value"}
|
||||||
|
|
||||||
|
result = _update_metadata_with_tags_in_header(request=request, metadata=metadata)
|
||||||
|
|
||||||
|
assert result == {"existing": "value", "tags": ["tag1", "tag2", "tag3"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_init_kwargs_for_pass_through_endpoint_basic(
|
||||||
|
mock_request, mock_user_api_key_dict
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Basic test for init_kwargs_for_pass_through_endpoint
|
||||||
|
|
||||||
|
- metadata should contain user_api_key, user_api_key_user_id, user_api_key_team_id, user_api_key_end_user_id from `mock_user_api_key_dict`
|
||||||
|
"""
|
||||||
|
request = mock_request()
|
||||||
|
passthrough_payload = PassthroughStandardLoggingPayload(
|
||||||
|
url="https://test.com",
|
||||||
|
request_body={},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _init_kwargs_for_pass_through_endpoint(
|
||||||
|
request=request,
|
||||||
|
user_api_key_dict=mock_user_api_key_dict,
|
||||||
|
passthrough_logging_payload=passthrough_payload,
|
||||||
|
litellm_call_id="test-call-id",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["call_type"] == "pass_through_endpoint"
|
||||||
|
assert result["litellm_call_id"] == "test-call-id"
|
||||||
|
assert result["passthrough_logging_payload"] == passthrough_payload
|
||||||
|
|
||||||
|
# Check metadata
|
||||||
|
expected_metadata = {
|
||||||
|
"user_api_key": "test-key",
|
||||||
|
"user_api_key_user_id": "test-user",
|
||||||
|
"user_api_key_team_id": "test-team",
|
||||||
|
"user_api_key_end_user_id": "test-user",
|
||||||
|
}
|
||||||
|
assert result["litellm_params"]["metadata"] == expected_metadata
|
||||||
|
|
||||||
|
|
||||||
|
def test_init_kwargs_with_litellm_metadata(mock_request, mock_user_api_key_dict):
|
||||||
|
"""
|
||||||
|
Expected behavior: litellm_metadata should be merged with default metadata
|
||||||
|
|
||||||
|
see usage example here: https://docs.litellm.ai/docs/pass_through/anthropic_completion#send-litellm_metadata-tags
|
||||||
|
"""
|
||||||
|
request = mock_request()
|
||||||
|
parsed_body = {
|
||||||
|
"litellm_metadata": {"custom_field": "custom_value", "tags": ["tag1", "tag2"]}
|
||||||
|
}
|
||||||
|
passthrough_payload = PassthroughStandardLoggingPayload(
|
||||||
|
url="https://test.com",
|
||||||
|
request_body={},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _init_kwargs_for_pass_through_endpoint(
|
||||||
|
request=request,
|
||||||
|
user_api_key_dict=mock_user_api_key_dict,
|
||||||
|
passthrough_logging_payload=passthrough_payload,
|
||||||
|
_parsed_body=parsed_body,
|
||||||
|
litellm_call_id="test-call-id",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check that litellm_metadata was merged with default metadata
|
||||||
|
metadata = result["litellm_params"]["metadata"]
|
||||||
|
print("metadata", metadata)
|
||||||
|
assert metadata["custom_field"] == "custom_value"
|
||||||
|
assert metadata["tags"] == ["tag1", "tag2"]
|
||||||
|
assert metadata["user_api_key"] == "test-key"
|
||||||
|
|
||||||
|
|
||||||
|
def test_init_kwargs_with_tags_in_header(mock_request, mock_user_api_key_dict):
|
||||||
|
"""
|
||||||
|
Tags should be added to metadata if they exist in headers
|
||||||
|
"""
|
||||||
|
request = mock_request(headers={"tags": "tag1,tag2"})
|
||||||
|
passthrough_payload = PassthroughStandardLoggingPayload(
|
||||||
|
url="https://test.com",
|
||||||
|
request_body={},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _init_kwargs_for_pass_through_endpoint(
|
||||||
|
request=request,
|
||||||
|
user_api_key_dict=mock_user_api_key_dict,
|
||||||
|
passthrough_logging_payload=passthrough_payload,
|
||||||
|
litellm_call_id="test-call-id",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check that tags were added to metadata
|
||||||
|
metadata = result["litellm_params"]["metadata"]
|
||||||
|
print("metadata", metadata)
|
||||||
|
assert metadata["tags"] == ["tag1", "tag2"]
|
||||||
|
|
||||||
|
|
||||||
|
athropic_request_body = {
|
||||||
|
"model": "claude-3-5-sonnet-20241022",
|
||||||
|
"max_tokens": 256,
|
||||||
|
"messages": [{"role": "user", "content": "Hello, world tell me 2 sentences "}],
|
||||||
|
"litellm_metadata": {"tags": ["hi", "hello"]},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pass_through_request_logging_failure(
|
||||||
|
mock_request, mock_user_api_key_dict
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Test that pass_through_request still returns a response even if logging raises an Exception
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Mock the logging handler to raise an error
|
||||||
|
async def mock_logging_failure(*args, **kwargs):
|
||||||
|
raise Exception("Logging failed!")
|
||||||
|
|
||||||
|
# Create a mock response
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.headers = {"content-type": "application/json"}
|
||||||
|
|
||||||
|
# Add mock content
|
||||||
|
mock_response._content = b'{"mock": "response"}'
|
||||||
|
|
||||||
|
async def mock_aread():
|
||||||
|
return mock_response._content
|
||||||
|
|
||||||
|
mock_response.aread = mock_aread
|
||||||
|
|
||||||
|
# Patch both the logging handler and the httpx client
|
||||||
|
with patch(
|
||||||
|
"litellm.proxy.pass_through_endpoints.pass_through_endpoints.PassThroughEndpointLogging.pass_through_async_success_handler",
|
||||||
|
new=mock_logging_failure,
|
||||||
|
), patch(
|
||||||
|
"httpx.AsyncClient.send",
|
||||||
|
return_value=mock_response,
|
||||||
|
), patch(
|
||||||
|
"httpx.AsyncClient.request",
|
||||||
|
return_value=mock_response,
|
||||||
|
):
|
||||||
|
request = mock_request(
|
||||||
|
headers={}, method="POST", request_body=athropic_request_body
|
||||||
|
)
|
||||||
|
response = await pass_through_request(
|
||||||
|
request=request,
|
||||||
|
target="https://exampleopenaiendpoint-production.up.railway.app/v1/messages",
|
||||||
|
custom_headers={},
|
||||||
|
user_api_key_dict=mock_user_api_key_dict,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert response was returned successfully despite logging failure
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
# Verify we got the mock response content
|
||||||
|
if hasattr(response, "body"):
|
||||||
|
content = response.body
|
||||||
|
else:
|
||||||
|
content = await response.aread()
|
||||||
|
|
||||||
|
assert content == b'{"mock": "response"}'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pass_through_request_logging_failure_with_stream(
|
||||||
|
mock_request, mock_user_api_key_dict
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Test that pass_through_request still returns a response even if logging raises an Exception
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Mock the logging handler to raise an error
|
||||||
|
async def mock_logging_failure(*args, **kwargs):
|
||||||
|
raise Exception("Logging failed!")
|
||||||
|
|
||||||
|
# Create a mock response
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
|
||||||
|
# Add headers property to mock response
|
||||||
|
mock_response.headers = {
|
||||||
|
"content-type": "application/json", # Not streaming
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create mock chunks for streaming
|
||||||
|
mock_chunks = [b'{"chunk": 1}', b'{"chunk": 2}']
|
||||||
|
mock_response.body_iterator = AsyncMock()
|
||||||
|
mock_response.body_iterator.__aiter__.return_value = mock_chunks
|
||||||
|
|
||||||
|
# Add aread method to mock response
|
||||||
|
mock_response._content = b'{"mock": "response"}'
|
||||||
|
|
||||||
|
async def mock_aread():
|
||||||
|
return mock_response._content
|
||||||
|
|
||||||
|
mock_response.aread = mock_aread
|
||||||
|
|
||||||
|
# Patch both the logging handler and the httpx client
|
||||||
|
with patch(
|
||||||
|
"litellm.proxy.pass_through_endpoints.streaming_handler.PassThroughStreamingHandler._route_streaming_logging_to_handler",
|
||||||
|
new=mock_logging_failure,
|
||||||
|
), patch(
|
||||||
|
"httpx.AsyncClient.send",
|
||||||
|
return_value=mock_response,
|
||||||
|
), patch(
|
||||||
|
"httpx.AsyncClient.request",
|
||||||
|
return_value=mock_response,
|
||||||
|
):
|
||||||
|
request = mock_request(
|
||||||
|
headers={}, method="POST", request_body=athropic_request_body
|
||||||
|
)
|
||||||
|
response = await pass_through_request(
|
||||||
|
request=request,
|
||||||
|
target="https://exampleopenaiendpoint-production.up.railway.app/v1/messages",
|
||||||
|
custom_headers={},
|
||||||
|
user_api_key_dict=mock_user_api_key_dict,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert response was returned successfully despite logging failure
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
# For non-streaming responses, we can access the content directly
|
||||||
|
if hasattr(response, "body"):
|
||||||
|
content = response.body
|
||||||
|
else:
|
||||||
|
# For streaming responses, we need to read the chunks
|
||||||
|
chunks = []
|
||||||
|
async for chunk in response.body_iterator:
|
||||||
|
chunks.append(chunk)
|
||||||
|
content = b"".join(chunks)
|
||||||
|
|
||||||
|
# Verify we got some response content
|
||||||
|
assert content is not None
|
||||||
|
if isinstance(content, bytes):
|
||||||
|
assert len(content) > 0
|
|
@ -18,6 +18,10 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||||
from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
|
from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
|
||||||
get_litellm_virtual_key,
|
get_litellm_virtual_key,
|
||||||
vertex_proxy_route,
|
vertex_proxy_route,
|
||||||
|
_get_vertex_env_vars,
|
||||||
|
set_default_vertex_config,
|
||||||
|
VertexPassThroughCredentials,
|
||||||
|
default_vertex_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,3 +86,84 @@ async def test_vertex_proxy_route_api_key_auth():
|
||||||
mock_auth.assert_called_once()
|
mock_auth.assert_called_once()
|
||||||
call_args = mock_auth.call_args[1]
|
call_args = mock_auth.call_args[1]
|
||||||
assert call_args["api_key"] == "Bearer test-key-123"
|
assert call_args["api_key"] == "Bearer test-key-123"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_vertex_env_vars():
|
||||||
|
"""Test that _get_vertex_env_vars correctly reads environment variables"""
|
||||||
|
# Set environment variables for the test
|
||||||
|
os.environ["DEFAULT_VERTEXAI_PROJECT"] = "test-project-123"
|
||||||
|
os.environ["DEFAULT_VERTEXAI_LOCATION"] = "us-central1"
|
||||||
|
os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/creds"
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = _get_vertex_env_vars()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# Verify the result
|
||||||
|
assert isinstance(result, VertexPassThroughCredentials)
|
||||||
|
assert result.vertex_project == "test-project-123"
|
||||||
|
assert result.vertex_location == "us-central1"
|
||||||
|
assert result.vertex_credentials == "/path/to/creds"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up environment variables
|
||||||
|
del os.environ["DEFAULT_VERTEXAI_PROJECT"]
|
||||||
|
del os.environ["DEFAULT_VERTEXAI_LOCATION"]
|
||||||
|
del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_set_default_vertex_config():
|
||||||
|
"""Test set_default_vertex_config with various inputs"""
|
||||||
|
# Test with None config - set environment variables first
|
||||||
|
os.environ["DEFAULT_VERTEXAI_PROJECT"] = "env-project"
|
||||||
|
os.environ["DEFAULT_VERTEXAI_LOCATION"] = "env-location"
|
||||||
|
os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"] = "env-creds"
|
||||||
|
os.environ["GOOGLE_CREDS"] = "secret-creds"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Test with None config
|
||||||
|
set_default_vertex_config()
|
||||||
|
from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
|
||||||
|
default_vertex_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert default_vertex_config.vertex_project == "env-project"
|
||||||
|
assert default_vertex_config.vertex_location == "env-location"
|
||||||
|
assert default_vertex_config.vertex_credentials == "env-creds"
|
||||||
|
|
||||||
|
# Test with valid config.yaml settings on vertex_config
|
||||||
|
test_config = {
|
||||||
|
"vertex_project": "my-project-123",
|
||||||
|
"vertex_location": "us-central1",
|
||||||
|
"vertex_credentials": "path/to/creds",
|
||||||
|
}
|
||||||
|
set_default_vertex_config(test_config)
|
||||||
|
from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
|
||||||
|
default_vertex_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert default_vertex_config.vertex_project == "my-project-123"
|
||||||
|
assert default_vertex_config.vertex_location == "us-central1"
|
||||||
|
assert default_vertex_config.vertex_credentials == "path/to/creds"
|
||||||
|
|
||||||
|
# Test with environment variable reference
|
||||||
|
test_config = {
|
||||||
|
"vertex_project": "my-project-123",
|
||||||
|
"vertex_location": "us-central1",
|
||||||
|
"vertex_credentials": "os.environ/GOOGLE_CREDS",
|
||||||
|
}
|
||||||
|
set_default_vertex_config(test_config)
|
||||||
|
from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
|
||||||
|
default_vertex_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert default_vertex_config.vertex_credentials == "secret-creds"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up environment variables
|
||||||
|
del os.environ["DEFAULT_VERTEXAI_PROJECT"]
|
||||||
|
del os.environ["DEFAULT_VERTEXAI_LOCATION"]
|
||||||
|
del os.environ["DEFAULT_GOOGLE_APPLICATION_CREDENTIALS"]
|
||||||
|
del os.environ["GOOGLE_CREDS"]
|
||||||
|
|
1
ui/litellm-dashboard/out/404.html
Normal file
1
ui/litellm-dashboard/out/404.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -0,0 +1 @@
|
||||||
|
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(57437);n(2265);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(e,n,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(e){e.exports={style:{fontFamily:"'__Inter_86ef86', '__Inter_Fallback_86ef86'",fontStyle:"normal"},className:"__className_86ef86"}}},function(e){e.O(0,[971,69,744],function(){return e(e.s=87421)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return g}});var l=t(57437),n=t(2265),a=t(47907),i=t(2179),r=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),f=t(777),p=t(37963),j=t(60620),_=t(13565);function g(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("invitation_id"),[g,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,v]=(0,n.useState)(null),[y,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,f.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,p.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),v(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(r.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",g,"token:",I,"formValues:",e),g&&I&&(e.user_email=S,N&&t&&(0,f.m_)(g,t,N,e.password).then(e=>{var s;let t="/ui/";t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id),document.cookie="token="+I,console.log("redirecting to:",t),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(_.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,902,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[888],{41597:function(n,_,u){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_app",function(){return u(57174)}])}},function(n){var _=function(_){return n(n.s=_)};n.O(0,[774,179],function(){return _(41597),_(94546)}),_N_E=n.O()}]);
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[820],{81981:function(n,_,u){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_error",function(){return u(5103)}])}},function(n){n.O(0,[888,774,179],function(){return n(n.s=81981)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue