diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index 762a9f6df..7bed6c1d8 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -8,8 +8,8 @@ Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`m | Param Name | Description | |----------------------|---------------------------------------------------------------| | `model_list` | List of supported models on the server, with model-specific configs | -| `router_settings` | litellm Router settings, example `routing_strategy="least-busy"` [**see all**](https://github.com/BerriAI/litellm/blob/6ef0e8485e0e720c0efa6f3075ce8119f2f62eea/litellm/router.py#L64)| -| `litellm_settings` | litellm Module settings, example `litellm.drop_params=True`, `litellm.set_verbose=True`, `litellm.api_base`, `litellm.cache` [**see all**](https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py)| +| `router_settings` | litellm Router settings, example `routing_strategy="least-busy"` [**see all**](#router-settings)| +| `litellm_settings` | litellm Module settings, example `litellm.drop_params=True`, `litellm.set_verbose=True`, `litellm.api_base`, `litellm.cache` [**see all**](#all-settings)| | `general_settings` | Server settings, example setting `master_key: sk-my_special_key` | | `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` | @@ -559,8 +559,8 @@ model_list: initial_prompt_value: "\n" roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}} final_prompt_value: "\n" - bos_token: "" - eos_token: "" + bos_token: " " + eos_token: " " max_tokens: 4096 ``` @@ -582,70 +582,92 @@ general_settings: ## **All settings** -```python -{ - "environment_variables": {}, - "model_list": [ - { - "model_name": "string", - "litellm_params": {}, - "model_info": { - "id": "string", - "mode": "embedding", - "input_cost_per_token": 0, - "output_cost_per_token": 0, - "max_tokens": 2048, - "base_model": "gpt-4-1106-preview", - "additionalProp1": {} - } - } - ], - "litellm_settings": { - "success_callback": "List[str]" # list of success callbacks - eg ["langfuse"] - "failure_callback": "List[str]" # list of failure callbacks - eg ["sentry"] - "callbacks": "List[str] or str" # list of callbacks - runs on success and failure - eg ["otel"] - "service_callbacks": "List[str]" # logs redis, postgres failures on datadog, prometheus - "turn_off_message_logging": "boolean" # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged. - "redact_user_api_key_info": "boolean" # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging. +```yaml +environment_variables: {} - }, # ALL (https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py) - "callback_settings": { - "otel": { # OTEL logging callback specific settings - "message_logging": "boolean" - } - }, - "general_settings": { - "completion_model": "string", - "disable_spend_logs": "boolean", # turn off writing each transaction to the db - "disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint) - "disable_retry_on_max_parallel_request_limit_error": "boolean", # turn off retries when max parallel request limit is reached - "disable_reset_budget": "boolean", # turn off reset budget scheduled task - "disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking - "enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims - "enforce_user_param": "boolean", # requires all openai endpoint requests to have a 'user' param - "allowed_routes": "list", # list of allowed proxy API routes - a user can access. (currently JWT-Auth only) - "key_management_system": "google_kms", # either google_kms or azure_kms - "master_key": "string", - "database_url": "string", - "database_connection_pool_limit": 0, # default 100 - "database_connection_timeout": 0, # default 60s - "otel": true, - "custom_auth": "string", - "max_parallel_requests": 0, # the max parallel requests allowed per deployment - "global_max_parallel_requests": 0, # the max parallel requests allowed on the proxy all up - "infer_model_from_keys": true, - "background_health_checks": true, - "health_check_interval": 300, - "alerting": [ - "string" - ], - "alerting_threshold": 0, - "use_client_credentials_pass_through_routes" : "boolean", # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints" https://docs.litellm.ai/docs/pass_through/vertex_ai - } -} +model_list: + - model_name: string + litellm_params: {} + model_info: + id: string + mode: embedding + input_cost_per_token: 0 + output_cost_per_token: 0 + max_tokens: 2048 + base_model: gpt-4-1106-preview + additionalProp1: {} + +litellm_settings: + success_callback: ["langfuse"] # list of success callbacks + failure_callback: ["sentry"] # list of failure callbacks + callbacks: ["otel"] # list of callbacks - runs on success and failure + service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus + turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged. + redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging. + +callback_settings: + otel: + message_logging: boolean # OTEL logging callback specific settings + +general_settings: + completion_model: string + disable_spend_logs: boolean # turn off writing each transaction to the db + disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint) + disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached + disable_reset_budget: boolean # turn off reset budget scheduled task + disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking + enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims + enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param + allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only) + key_management_system: google_kms # either google_kms or azure_kms + master_key: string + database_url: string + database_connection_pool_limit: 0 # default 100 + database_connection_timeout: 0 # default 60s + custom_auth: string + max_parallel_requests: 0 # the max parallel requests allowed per deployment + global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up + infer_model_from_keys: true + background_health_checks: true + health_check_interval: 300 + alerting: ["slack", "email"] + alerting_threshold: 0 + use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints ``` +### Router Settings + +```yaml +router_settings: + routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" + redis_host: # string + redis_password: # string + redis_port: # string + enable_pre_call_check: true # bool - Before call is made check if a call is within model context window + allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. + cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails + disable_cooldowns: True # bool - Disable cooldowns for all models + retry_policy: { # Dict[str, int]: retry policy for different types of exceptions + "AuthenticationErrorRetries": 3, + "TimeoutErrorRetries": 3, + "RateLimitErrorRetries": 3, + "ContentPolicyViolationErrorRetries": 4, + "InternalServerErrorRetries": 4 + } + allowed_fails_policy: { + "BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment + "AuthenticationErrorAllowedFails": 10, # int + "TimeoutErrorAllowedFails": 12, # int + "RateLimitErrorAllowedFails": 10000, # int + "ContentPolicyViolationErrorAllowedFails": 15, # int + "InternalServerErrorAllowedFails": 20, # int + } + content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations + fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors +``` + + ## Extras