forked from phoenix/litellm-mirror
(docs) router settings - on litellm config (#6037)
* add yaml with all router settings * add docs for router settings * docs router settings litellm settings
This commit is contained in:
parent
0c9c42915f
commit
6d1de8e1ee
1 changed files with 86 additions and 64 deletions
|
@ -8,8 +8,8 @@ Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`m
|
|||
| Param Name | Description |
|
||||
|----------------------|---------------------------------------------------------------|
|
||||
| `model_list` | List of supported models on the server, with model-specific configs |
|
||||
| `router_settings` | litellm Router settings, example `routing_strategy="least-busy"` [**see all**](https://github.com/BerriAI/litellm/blob/6ef0e8485e0e720c0efa6f3075ce8119f2f62eea/litellm/router.py#L64)|
|
||||
| `litellm_settings` | litellm Module settings, example `litellm.drop_params=True`, `litellm.set_verbose=True`, `litellm.api_base`, `litellm.cache` [**see all**](https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py)|
|
||||
| `router_settings` | litellm Router settings, example `routing_strategy="least-busy"` [**see all**](#router-settings)|
|
||||
| `litellm_settings` | litellm Module settings, example `litellm.drop_params=True`, `litellm.set_verbose=True`, `litellm.api_base`, `litellm.cache` [**see all**](#all-settings)|
|
||||
| `general_settings` | Server settings, example setting `master_key: sk-my_special_key` |
|
||||
| `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
|
||||
|
||||
|
@ -559,8 +559,8 @@ model_list:
|
|||
initial_prompt_value: "\n"
|
||||
roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
|
||||
final_prompt_value: "\n"
|
||||
bos_token: "<s>"
|
||||
eos_token: "</s>"
|
||||
bos_token: " "
|
||||
eos_token: " "
|
||||
max_tokens: 4096
|
||||
```
|
||||
|
||||
|
@ -582,70 +582,92 @@ general_settings:
|
|||
|
||||
## **All settings**
|
||||
|
||||
```python
|
||||
{
|
||||
"environment_variables": {},
|
||||
"model_list": [
|
||||
{
|
||||
"model_name": "string",
|
||||
"litellm_params": {},
|
||||
"model_info": {
|
||||
"id": "string",
|
||||
"mode": "embedding",
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"max_tokens": 2048,
|
||||
"base_model": "gpt-4-1106-preview",
|
||||
"additionalProp1": {}
|
||||
}
|
||||
}
|
||||
],
|
||||
"litellm_settings": {
|
||||
"success_callback": "List[str]" # list of success callbacks - eg ["langfuse"]
|
||||
"failure_callback": "List[str]" # list of failure callbacks - eg ["sentry"]
|
||||
"callbacks": "List[str] or str" # list of callbacks - runs on success and failure - eg ["otel"]
|
||||
"service_callbacks": "List[str]" # logs redis, postgres failures on datadog, prometheus
|
||||
|
||||
"turn_off_message_logging": "boolean" # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
|
||||
"redact_user_api_key_info": "boolean" # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
||||
```yaml
|
||||
environment_variables: {}
|
||||
|
||||
}, # ALL (https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py)
|
||||
"callback_settings": {
|
||||
"otel": { # OTEL logging callback specific settings
|
||||
"message_logging": "boolean"
|
||||
}
|
||||
},
|
||||
"general_settings": {
|
||||
"completion_model": "string",
|
||||
"disable_spend_logs": "boolean", # turn off writing each transaction to the db
|
||||
"disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint)
|
||||
"disable_retry_on_max_parallel_request_limit_error": "boolean", # turn off retries when max parallel request limit is reached
|
||||
"disable_reset_budget": "boolean", # turn off reset budget scheduled task
|
||||
"disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking
|
||||
"enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
|
||||
"enforce_user_param": "boolean", # requires all openai endpoint requests to have a 'user' param
|
||||
"allowed_routes": "list", # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
|
||||
"key_management_system": "google_kms", # either google_kms or azure_kms
|
||||
"master_key": "string",
|
||||
"database_url": "string",
|
||||
"database_connection_pool_limit": 0, # default 100
|
||||
"database_connection_timeout": 0, # default 60s
|
||||
"otel": true,
|
||||
"custom_auth": "string",
|
||||
"max_parallel_requests": 0, # the max parallel requests allowed per deployment
|
||||
"global_max_parallel_requests": 0, # the max parallel requests allowed on the proxy all up
|
||||
"infer_model_from_keys": true,
|
||||
"background_health_checks": true,
|
||||
"health_check_interval": 300,
|
||||
"alerting": [
|
||||
"string"
|
||||
],
|
||||
"alerting_threshold": 0,
|
||||
"use_client_credentials_pass_through_routes" : "boolean", # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints" https://docs.litellm.ai/docs/pass_through/vertex_ai
|
||||
}
|
||||
}
|
||||
model_list:
|
||||
- model_name: string
|
||||
litellm_params: {}
|
||||
model_info:
|
||||
id: string
|
||||
mode: embedding
|
||||
input_cost_per_token: 0
|
||||
output_cost_per_token: 0
|
||||
max_tokens: 2048
|
||||
base_model: gpt-4-1106-preview
|
||||
additionalProp1: {}
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"] # list of success callbacks
|
||||
failure_callback: ["sentry"] # list of failure callbacks
|
||||
callbacks: ["otel"] # list of callbacks - runs on success and failure
|
||||
service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus
|
||||
turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
|
||||
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
||||
|
||||
callback_settings:
|
||||
otel:
|
||||
message_logging: boolean # OTEL logging callback specific settings
|
||||
|
||||
general_settings:
|
||||
completion_model: string
|
||||
disable_spend_logs: boolean # turn off writing each transaction to the db
|
||||
disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint)
|
||||
disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached
|
||||
disable_reset_budget: boolean # turn off reset budget scheduled task
|
||||
disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking
|
||||
enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
|
||||
enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param
|
||||
allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
|
||||
key_management_system: google_kms # either google_kms or azure_kms
|
||||
master_key: string
|
||||
database_url: string
|
||||
database_connection_pool_limit: 0 # default 100
|
||||
database_connection_timeout: 0 # default 60s
|
||||
custom_auth: string
|
||||
max_parallel_requests: 0 # the max parallel requests allowed per deployment
|
||||
global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up
|
||||
infer_model_from_keys: true
|
||||
background_health_checks: true
|
||||
health_check_interval: 300
|
||||
alerting: ["slack", "email"]
|
||||
alerting_threshold: 0
|
||||
use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints
|
||||
```
|
||||
|
||||
### Router Settings
|
||||
|
||||
```yaml
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
||||
redis_host: <your-redis-host> # string
|
||||
redis_password: <your-redis-password> # string
|
||||
redis_port: <your-redis-port> # string
|
||||
enable_pre_call_check: true # bool - Before call is made check if a call is within model context window
|
||||
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||
cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
|
||||
disable_cooldowns: True # bool - Disable cooldowns for all models
|
||||
retry_policy: { # Dict[str, int]: retry policy for different types of exceptions
|
||||
"AuthenticationErrorRetries": 3,
|
||||
"TimeoutErrorRetries": 3,
|
||||
"RateLimitErrorRetries": 3,
|
||||
"ContentPolicyViolationErrorRetries": 4,
|
||||
"InternalServerErrorRetries": 4
|
||||
}
|
||||
allowed_fails_policy: {
|
||||
"BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
|
||||
"AuthenticationErrorAllowedFails": 10, # int
|
||||
"TimeoutErrorAllowedFails": 12, # int
|
||||
"RateLimitErrorAllowedFails": 10000, # int
|
||||
"ContentPolicyViolationErrorAllowedFails": 15, # int
|
||||
"InternalServerErrorAllowedFails": 20, # int
|
||||
}
|
||||
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations
|
||||
fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors
|
||||
```
|
||||
|
||||
|
||||
## Extras
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue