docs(config.md): adding docs on parallel request rate limiting

2023-12-07 11:27:37 -08:00 · 2023-12-07 11:27:37 -08:00 · d77e0cc716
commit d77e0cc716
parent 077f6b1298
6 changed files with 14004 additions and 385 deletions
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@ -12,6 +12,9 @@ Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`m
 | `general_settings`   | Server settings, example setting `master_key: sk-my_special_key` |
 | `environment_variables`   | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
 **Complete List:** Check the Swagger UI docs on `<your-proxy-url>/#/config.yaml` (e.g. http://0.0.0.0:8000/#/config.yaml), for everything you can pass in the config.yaml.
 ## Quick Start 
 Set a model alias for your deployments. 
@ -301,4 +304,18 @@ model_list:
 ```shell
 $ litellm --config /path/to/config.yaml
-```
+```
 ## Max Parallel Requests
 To rate limit a user based on the number of parallel requests, e.g.: 
 if user's parallel requests > x, send a 429 error
 if user's parallel requests <= x, let them use the API freely.
 set the max parallel request limit on the config.yaml (note: this expects the user to be passing in an api key).
 ```yaml
 general_settings:
  max_parallel_requests: 100 # max parallel requests for a user = 100
 ```
--- a/docs/my-website/package-lock.json
+++ b/docs/my-website/package-lock.json
--- a/docs/my-website/package.json
+++ b/docs/my-website/package.json
@ -20,6 +20,7 @@
    "@docusaurus/preset-classic": "2.4.1",
    "@mdx-js/react": "^1.6.22",
    "clsx": "^1.2.1",
    "docusaurus": "^1.14.7",
    "docusaurus-lunr-search": "^2.4.1",
    "prism-react-renderer": "^1.3.5",
    "react": "^17.0.2",
--- a/docs/my-website/yarn.lock
+++ b/docs/my-website/yarn.lock
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -1,4 +1,4 @@
-from pydantic import BaseModel, Extra
+from pydantic import BaseModel, Extra, Field
 from typing import Optional, List, Union, Dict, Literal
 from datetime import datetime
 import uuid
@ -38,6 +38,10 @@ class ProxyChatCompletionRequest(BaseModel):
    class Config:
        extra='allow' # allow params not defined here, these fall in litellm.completion(**kwargs)
 class ModelInfoDelete(BaseModel):
    id: Optional[str]
 class ModelInfo(BaseModel):
    id: Optional[str]
    mode: Optional[Literal['embedding', 'chat', 'completion']]
@ -62,8 +66,6 @@ class ModelInfo(BaseModel):
        extra = Extra.allow  # Allow extra fields
        protected_namespaces = ()
 class ModelInfoDelete(BaseModel):
    id: Optional[str]
 class ModelParams(BaseModel):
    model_name: str
@ -95,4 +97,25 @@ class DeleteKeyRequest(BaseModel):
 class UserAPIKeyAuth(BaseModel): # the expected response object for user api key auth
    api_key: Optional[str] = None
-    user_id: Optional[str] = None
+    user_id: Optional[str] = None
 class ConfigGeneralSettings(BaseModel):
    """
    Documents all the fields supported by `general_settings` in config.yaml
    """
    completion_model: Optional[str] = Field(None, description="proxy level default model for all chat completion calls") 
    use_azure_key_vault: Optional[bool] = Field(None, description="load keys from azure key vault")
    master_key: Optional[str] = Field(None, description="require a key for all calls to proxy")
    database_url: Optional[str] = Field(None, description="connect to a postgres db - needed for generating temporary keys + tracking spend / key")
    otel: Optional[bool] = Field(None, description="[BETA] OpenTelemetry support - this might change, use with caution.")
    custom_auth: Optional[str] = Field(None, description="override user_api_key_auth with your own auth script - https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth")
    max_parallel_requests: Optional[int] = Field(None, description="maximum parallel requests for each api key")
    infer_model_from_keys: Optional[bool] = Field(None, description="for `/models` endpoint, infers available model based on environment keys (e.g. OPENAI_API_KEY)")
 class ConfigYAML(BaseModel):
    """
    Documents all the fields supported by the config.yaml
    """
    model_list: Optional[List[ModelParams]] = Field(None, description="List of supported models on the server, with model-specific configs")
    litellm_settings: Optional[dict] = Field(None, description="litellm Module settings. See __init__.py for all, example litellm.drop_params=True, litellm.set_verbose=True, litellm.api_base, litellm.cache")
    general_settings: Optional[ConfigGeneralSettings] = None
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1297,6 +1297,27 @@ async def retrieve_server_log(request: Request):
 #### BASIC ENDPOINTS #### 
@router.get("/config/yaml", tags=["config.yaml"])
 async def config_yaml_endpoint(config_info: ConfigYAML): 
    """
    This is a mock endpoint, to show what you can set in config.yaml details in the Swagger UI. 
    Parameters:
    The config.yaml object has the following attributes:
    - **model_list**: *Optional[List[ModelParams]]* - A list of supported models on the server, along with model-specific configurations. ModelParams includes "model_name" (name of the model), "litellm_params" (litellm-specific parameters for the model), and "model_info" (additional info about the model such as id, mode, cost per token, etc). 
    - **litellm_settings**: *Optional[dict]*: Settings for the litellm module. You can specify multiple properties like "drop_params", "set_verbose", "api_base", "cache".
    - **general_settings**: *Optional[ConfigGeneralSettings]*: General settings for the server like "completion_model" (default model for chat completion calls), "use_azure_key_vault" (option to load keys from azure key vault), "master_key" (key required for all calls to proxy), and others. 
    Please, refer to each class's description for a better understanding of the specific attributes within them.
    Note: This is a mock endpoint primarily meant for demonstration purposes, and does not actually provide or change any configurations.
    """
    return {"hello": "world"}
@router.get("/test")
 async def test_endpoint(request: Request): 
    return {"route": request.url.path}