docs(config.md): adding docs on parallel request rate limiting

This commit is contained in:
Krrish Dholakia 2023-12-07 11:27:37 -08:00
parent 077f6b1298
commit d77e0cc716
6 changed files with 14004 additions and 385 deletions

View file

@ -12,6 +12,9 @@ Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`m
| `general_settings` | Server settings, example setting `master_key: sk-my_special_key` | | `general_settings` | Server settings, example setting `master_key: sk-my_special_key` |
| `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` | | `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
**Complete List:** Check the Swagger UI docs on `<your-proxy-url>/#/config.yaml` (e.g. http://0.0.0.0:8000/#/config.yaml), for everything you can pass in the config.yaml.
## Quick Start ## Quick Start
Set a model alias for your deployments. Set a model alias for your deployments.
@ -301,4 +304,18 @@ model_list:
```shell ```shell
$ litellm --config /path/to/config.yaml $ litellm --config /path/to/config.yaml
``` ```
## Max Parallel Requests
To rate limit a user based on the number of parallel requests, e.g.:
if user's parallel requests > x, send a 429 error
if user's parallel requests <= x, let them use the API freely.
set the max parallel request limit on the config.yaml (note: this expects the user to be passing in an api key).
```yaml
general_settings:
max_parallel_requests: 100 # max parallel requests for a user = 100
```

File diff suppressed because it is too large Load diff

View file

@ -20,6 +20,7 @@
"@docusaurus/preset-classic": "2.4.1", "@docusaurus/preset-classic": "2.4.1",
"@mdx-js/react": "^1.6.22", "@mdx-js/react": "^1.6.22",
"clsx": "^1.2.1", "clsx": "^1.2.1",
"docusaurus": "^1.14.7",
"docusaurus-lunr-search": "^2.4.1", "docusaurus-lunr-search": "^2.4.1",
"prism-react-renderer": "^1.3.5", "prism-react-renderer": "^1.3.5",
"react": "^17.0.2", "react": "^17.0.2",

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
from pydantic import BaseModel, Extra from pydantic import BaseModel, Extra, Field
from typing import Optional, List, Union, Dict, Literal from typing import Optional, List, Union, Dict, Literal
from datetime import datetime from datetime import datetime
import uuid import uuid
@ -38,6 +38,10 @@ class ProxyChatCompletionRequest(BaseModel):
class Config: class Config:
extra='allow' # allow params not defined here, these fall in litellm.completion(**kwargs) extra='allow' # allow params not defined here, these fall in litellm.completion(**kwargs)
class ModelInfoDelete(BaseModel):
id: Optional[str]
class ModelInfo(BaseModel): class ModelInfo(BaseModel):
id: Optional[str] id: Optional[str]
mode: Optional[Literal['embedding', 'chat', 'completion']] mode: Optional[Literal['embedding', 'chat', 'completion']]
@ -62,8 +66,6 @@ class ModelInfo(BaseModel):
extra = Extra.allow # Allow extra fields extra = Extra.allow # Allow extra fields
protected_namespaces = () protected_namespaces = ()
class ModelInfoDelete(BaseModel):
id: Optional[str]
class ModelParams(BaseModel): class ModelParams(BaseModel):
model_name: str model_name: str
@ -95,4 +97,25 @@ class DeleteKeyRequest(BaseModel):
class UserAPIKeyAuth(BaseModel): # the expected response object for user api key auth class UserAPIKeyAuth(BaseModel): # the expected response object for user api key auth
api_key: Optional[str] = None api_key: Optional[str] = None
user_id: Optional[str] = None user_id: Optional[str] = None
class ConfigGeneralSettings(BaseModel):
"""
Documents all the fields supported by `general_settings` in config.yaml
"""
completion_model: Optional[str] = Field(None, description="proxy level default model for all chat completion calls")
use_azure_key_vault: Optional[bool] = Field(None, description="load keys from azure key vault")
master_key: Optional[str] = Field(None, description="require a key for all calls to proxy")
database_url: Optional[str] = Field(None, description="connect to a postgres db - needed for generating temporary keys + tracking spend / key")
otel: Optional[bool] = Field(None, description="[BETA] OpenTelemetry support - this might change, use with caution.")
custom_auth: Optional[str] = Field(None, description="override user_api_key_auth with your own auth script - https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth")
max_parallel_requests: Optional[int] = Field(None, description="maximum parallel requests for each api key")
infer_model_from_keys: Optional[bool] = Field(None, description="for `/models` endpoint, infers available model based on environment keys (e.g. OPENAI_API_KEY)")
class ConfigYAML(BaseModel):
"""
Documents all the fields supported by the config.yaml
"""
model_list: Optional[List[ModelParams]] = Field(None, description="List of supported models on the server, with model-specific configs")
litellm_settings: Optional[dict] = Field(None, description="litellm Module settings. See __init__.py for all, example litellm.drop_params=True, litellm.set_verbose=True, litellm.api_base, litellm.cache")
general_settings: Optional[ConfigGeneralSettings] = None

View file

@ -1297,6 +1297,27 @@ async def retrieve_server_log(request: Request):
#### BASIC ENDPOINTS #### #### BASIC ENDPOINTS ####
@router.get("/config/yaml", tags=["config.yaml"])
async def config_yaml_endpoint(config_info: ConfigYAML):
"""
This is a mock endpoint, to show what you can set in config.yaml details in the Swagger UI.
Parameters:
The config.yaml object has the following attributes:
- **model_list**: *Optional[List[ModelParams]]* - A list of supported models on the server, along with model-specific configurations. ModelParams includes "model_name" (name of the model), "litellm_params" (litellm-specific parameters for the model), and "model_info" (additional info about the model such as id, mode, cost per token, etc).
- **litellm_settings**: *Optional[dict]*: Settings for the litellm module. You can specify multiple properties like "drop_params", "set_verbose", "api_base", "cache".
- **general_settings**: *Optional[ConfigGeneralSettings]*: General settings for the server like "completion_model" (default model for chat completion calls), "use_azure_key_vault" (option to load keys from azure key vault), "master_key" (key required for all calls to proxy), and others.
Please, refer to each class's description for a better understanding of the specific attributes within them.
Note: This is a mock endpoint primarily meant for demonstration purposes, and does not actually provide or change any configurations.
"""
return {"hello": "world"}
@router.get("/test") @router.get("/test")
async def test_endpoint(request: Request): async def test_endpoint(request: Request):
return {"route": request.url.path} return {"route": request.url.path}