(docs) router settings - on litellm config (#6037)

* add yaml with all router settings * add docs for router settings * docs router settings litellm settings
2024-10-04 18:59:01 +05:30 · 2024-10-04 18:59:01 +05:30 · 6d1de8e1ee
commit 6d1de8e1ee
parent 0c9c42915f
1 changed files with 86 additions and 64 deletions
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@ -8,8 +8,8 @@ Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`m
 | Param Name           | Description                                                   |
 |----------------------|---------------------------------------------------------------|
 | `model_list`         | List of supported models on the server, with model-specific configs |
-| `router_settings`   | litellm Router settings, example `routing_strategy="least-busy"` [**see all**](https://github.com/BerriAI/litellm/blob/6ef0e8485e0e720c0efa6f3075ce8119f2f62eea/litellm/router.py#L64)|
+| `router_settings`   | litellm Router settings, example `routing_strategy="least-busy"` [**see all**](#router-settings)|
-| `litellm_settings`   | litellm Module settings, example `litellm.drop_params=True`, `litellm.set_verbose=True`, `litellm.api_base`, `litellm.cache` [**see all**](https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py)|
+| `litellm_settings`   | litellm Module settings, example `litellm.drop_params=True`, `litellm.set_verbose=True`, `litellm.api_base`, `litellm.cache` [**see all**](#all-settings)|
 | `general_settings`   | Server settings, example setting `master_key: sk-my_special_key` |
 | `environment_variables`   | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
@ -559,8 +559,8 @@ model_list:
      initial_prompt_value: "\n"
      roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
      final_prompt_value: "\n"
-      bos_token: "<s>"
+      bos_token: " "
-      eos_token: "</s>"
+      eos_token: " "
      max_tokens: 4096
 ```
@ -582,70 +582,92 @@ general_settings:
 ## **All settings**
 ```python
 {
  "environment_variables": {},
  "model_list": [
    {
      "model_name": "string",
      "litellm_params": {},
      "model_info": {
        "id": "string",
        "mode": "embedding",
        "input_cost_per_token": 0,
        "output_cost_per_token": 0,
        "max_tokens": 2048,
        "base_model": "gpt-4-1106-preview",
        "additionalProp1": {}
      }
    }
  ],
  "litellm_settings": {
    "success_callback": "List[str]" # list of success callbacks - eg ["langfuse"]
    "failure_callback": "List[str]" # list of failure callbacks - eg ["sentry"]
    "callbacks": "List[str] or str" # list of callbacks - runs on success and failure - eg ["otel"]
    "service_callbacks": "List[str]" # logs redis, postgres failures on datadog, prometheus
-    "turn_off_message_logging": "boolean" # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
+```yaml
-    "redact_user_api_key_info": "boolean" # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
+environment_variables: {}
-  }, # ALL (https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py)
+model_list:
-  "callback_settings": {
+  - model_name: string
-    "otel": {        # OTEL logging callback specific settings
+    litellm_params: {}
-      "message_logging": "boolean"
+    model_info:
-    }
+      id: string
-  },
+      mode: embedding
-  "general_settings": {
+      input_cost_per_token: 0
-    "completion_model": "string",
+      output_cost_per_token: 0
-    "disable_spend_logs": "boolean", # turn off writing each transaction to the db
+      max_tokens: 2048
-    "disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint)
+      base_model: gpt-4-1106-preview
-    "disable_retry_on_max_parallel_request_limit_error": "boolean", # turn off retries when max parallel request limit is reached
+      additionalProp1: {}
-    "disable_reset_budget": "boolean", # turn off reset budget scheduled task
+
-    "disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking
+litellm_settings:
-    "enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
+  success_callback: ["langfuse"]  # list of success callbacks
-    "enforce_user_param": "boolean", # requires all openai endpoint requests to have a 'user' param
+  failure_callback: ["sentry"]  # list of failure callbacks
-    "allowed_routes": "list", # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
+  callbacks: ["otel"]  # list of callbacks - runs on success and failure
-    "key_management_system": "google_kms", # either google_kms or azure_kms
+  service_callbacks: ["datadog", "prometheus"]  # logs redis, postgres failures on datadog, prometheus
-    "master_key": "string",
+  turn_off_message_logging: boolean  # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
-    "database_url": "string",
+  redact_user_api_key_info: boolean  # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
-    "database_connection_pool_limit": 0, # default 100
+
-    "database_connection_timeout": 0, # default 60s
+callback_settings:
-    "otel": true,
+  otel:
-    "custom_auth": "string",
+    message_logging: boolean  # OTEL logging callback specific settings
-    "max_parallel_requests": 0, # the max parallel requests allowed per deployment 
+
-    "global_max_parallel_requests": 0, # the max parallel requests allowed on the proxy all up 
+general_settings:
-    "infer_model_from_keys": true,
+  completion_model: string
-    "background_health_checks": true,
+  disable_spend_logs: boolean  # turn off writing each transaction to the db
-    "health_check_interval": 300,
+  disable_master_key_return: boolean  # turn off returning master key on UI (checked on '/user/info' endpoint)
-    "alerting": [
+  disable_retry_on_max_parallel_request_limit_error: boolean  # turn off retries when max parallel request limit is reached
-      "string"
+  disable_reset_budget: boolean  # turn off reset budget scheduled task
-    ],
+  disable_adding_master_key_hash_to_db: boolean  # turn off storing master key hash in db, for spend tracking
-    "alerting_threshold": 0,
+  enable_jwt_auth: boolean  # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
-    "use_client_credentials_pass_through_routes" : "boolean", # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints" https://docs.litellm.ai/docs/pass_through/vertex_ai
+  enforce_user_param: boolean  # requires all openai endpoint requests to have a 'user' param
-  }
+  allowed_routes: ["route1", "route2"]  # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
-}
+  key_management_system: google_kms  # either google_kms or azure_kms
  master_key: string
  database_url: string
  database_connection_pool_limit: 0  # default 100
  database_connection_timeout: 0  # default 60s
  custom_auth: string
  max_parallel_requests: 0  # the max parallel requests allowed per deployment 
  global_max_parallel_requests: 0  # the max parallel requests allowed on the proxy all up 
  infer_model_from_keys: true
  background_health_checks: true
  health_check_interval: 300
  alerting: ["slack", "email"]
  alerting_threshold: 0
  use_client_credentials_pass_through_routes: boolean  # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints
 ```
 ### Router Settings
 ```yaml
 router_settings:
  routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
  redis_host: <your-redis-host>           # string
  redis_password: <your-redis-password>   # string
  redis_port: <your-redis-port>           # string
  enable_pre_call_check: true             # bool - Before call is made check if a call is within model context window 
  allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
  cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
  disable_cooldowns: True                  # bool - Disable cooldowns for all models 
  retry_policy: {                          # Dict[str, int]: retry policy for different types of exceptions
    "AuthenticationErrorRetries": 3,
    "TimeoutErrorRetries": 3,
    "RateLimitErrorRetries": 3,
    "ContentPolicyViolationErrorRetries": 4,
    "InternalServerErrorRetries": 4
  }
  allowed_fails_policy: {
    "BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
    "AuthenticationErrorAllowedFails": 10, # int 
    "TimeoutErrorAllowedFails": 12, # int 
    "RateLimitErrorAllowedFails": 10000, # int 
    "ContentPolicyViolationErrorAllowedFails": 15, # int 
    "InternalServerErrorAllowedFails": 20, # int 
  }
  content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations
  fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors
 ```
 ## Extras