forked from phoenix/litellm-mirror
docs(config.md): adding docs on parallel request rate limiting
This commit is contained in:
parent
077f6b1298
commit
d77e0cc716
6 changed files with 14004 additions and 385 deletions
|
@ -12,6 +12,9 @@ Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`m
|
||||||
| `general_settings` | Server settings, example setting `master_key: sk-my_special_key` |
|
| `general_settings` | Server settings, example setting `master_key: sk-my_special_key` |
|
||||||
| `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
|
| `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
|
||||||
|
|
||||||
|
**Complete List:** Check the Swagger UI docs on `<your-proxy-url>/#/config.yaml` (e.g. http://0.0.0.0:8000/#/config.yaml), for everything you can pass in the config.yaml.
|
||||||
|
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
Set a model alias for your deployments.
|
Set a model alias for your deployments.
|
||||||
|
@ -301,4 +304,18 @@ model_list:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --config /path/to/config.yaml
|
$ litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Max Parallel Requests
|
||||||
|
|
||||||
|
To rate limit a user based on the number of parallel requests, e.g.:
|
||||||
|
if user's parallel requests > x, send a 429 error
|
||||||
|
if user's parallel requests <= x, let them use the API freely.
|
||||||
|
|
||||||
|
set the max parallel request limit on the config.yaml (note: this expects the user to be passing in an api key).
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
max_parallel_requests: 100 # max parallel requests for a user = 100
|
||||||
|
```
|
||||||
|
|
||||||
|
|
8668
docs/my-website/package-lock.json
generated
8668
docs/my-website/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -20,6 +20,7 @@
|
||||||
"@docusaurus/preset-classic": "2.4.1",
|
"@docusaurus/preset-classic": "2.4.1",
|
||||||
"@mdx-js/react": "^1.6.22",
|
"@mdx-js/react": "^1.6.22",
|
||||||
"clsx": "^1.2.1",
|
"clsx": "^1.2.1",
|
||||||
|
"docusaurus": "^1.14.7",
|
||||||
"docusaurus-lunr-search": "^2.4.1",
|
"docusaurus-lunr-search": "^2.4.1",
|
||||||
"prism-react-renderer": "^1.3.5",
|
"prism-react-renderer": "^1.3.5",
|
||||||
"react": "^17.0.2",
|
"react": "^17.0.2",
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,4 @@
|
||||||
from pydantic import BaseModel, Extra
|
from pydantic import BaseModel, Extra, Field
|
||||||
from typing import Optional, List, Union, Dict, Literal
|
from typing import Optional, List, Union, Dict, Literal
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import uuid
|
import uuid
|
||||||
|
@ -38,6 +38,10 @@ class ProxyChatCompletionRequest(BaseModel):
|
||||||
class Config:
|
class Config:
|
||||||
extra='allow' # allow params not defined here, these fall in litellm.completion(**kwargs)
|
extra='allow' # allow params not defined here, these fall in litellm.completion(**kwargs)
|
||||||
|
|
||||||
|
class ModelInfoDelete(BaseModel):
|
||||||
|
id: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
class ModelInfo(BaseModel):
|
class ModelInfo(BaseModel):
|
||||||
id: Optional[str]
|
id: Optional[str]
|
||||||
mode: Optional[Literal['embedding', 'chat', 'completion']]
|
mode: Optional[Literal['embedding', 'chat', 'completion']]
|
||||||
|
@ -62,8 +66,6 @@ class ModelInfo(BaseModel):
|
||||||
extra = Extra.allow # Allow extra fields
|
extra = Extra.allow # Allow extra fields
|
||||||
protected_namespaces = ()
|
protected_namespaces = ()
|
||||||
|
|
||||||
class ModelInfoDelete(BaseModel):
|
|
||||||
id: Optional[str]
|
|
||||||
|
|
||||||
class ModelParams(BaseModel):
|
class ModelParams(BaseModel):
|
||||||
model_name: str
|
model_name: str
|
||||||
|
@ -95,4 +97,25 @@ class DeleteKeyRequest(BaseModel):
|
||||||
|
|
||||||
class UserAPIKeyAuth(BaseModel): # the expected response object for user api key auth
|
class UserAPIKeyAuth(BaseModel): # the expected response object for user api key auth
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
|
|
||||||
|
class ConfigGeneralSettings(BaseModel):
|
||||||
|
"""
|
||||||
|
Documents all the fields supported by `general_settings` in config.yaml
|
||||||
|
"""
|
||||||
|
completion_model: Optional[str] = Field(None, description="proxy level default model for all chat completion calls")
|
||||||
|
use_azure_key_vault: Optional[bool] = Field(None, description="load keys from azure key vault")
|
||||||
|
master_key: Optional[str] = Field(None, description="require a key for all calls to proxy")
|
||||||
|
database_url: Optional[str] = Field(None, description="connect to a postgres db - needed for generating temporary keys + tracking spend / key")
|
||||||
|
otel: Optional[bool] = Field(None, description="[BETA] OpenTelemetry support - this might change, use with caution.")
|
||||||
|
custom_auth: Optional[str] = Field(None, description="override user_api_key_auth with your own auth script - https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth")
|
||||||
|
max_parallel_requests: Optional[int] = Field(None, description="maximum parallel requests for each api key")
|
||||||
|
infer_model_from_keys: Optional[bool] = Field(None, description="for `/models` endpoint, infers available model based on environment keys (e.g. OPENAI_API_KEY)")
|
||||||
|
|
||||||
|
class ConfigYAML(BaseModel):
|
||||||
|
"""
|
||||||
|
Documents all the fields supported by the config.yaml
|
||||||
|
"""
|
||||||
|
model_list: Optional[List[ModelParams]] = Field(None, description="List of supported models on the server, with model-specific configs")
|
||||||
|
litellm_settings: Optional[dict] = Field(None, description="litellm Module settings. See __init__.py for all, example litellm.drop_params=True, litellm.set_verbose=True, litellm.api_base, litellm.cache")
|
||||||
|
general_settings: Optional[ConfigGeneralSettings] = None
|
||||||
|
|
|
@ -1297,6 +1297,27 @@ async def retrieve_server_log(request: Request):
|
||||||
|
|
||||||
#### BASIC ENDPOINTS ####
|
#### BASIC ENDPOINTS ####
|
||||||
|
|
||||||
|
@router.get("/config/yaml", tags=["config.yaml"])
|
||||||
|
async def config_yaml_endpoint(config_info: ConfigYAML):
|
||||||
|
"""
|
||||||
|
This is a mock endpoint, to show what you can set in config.yaml details in the Swagger UI.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
The config.yaml object has the following attributes:
|
||||||
|
- **model_list**: *Optional[List[ModelParams]]* - A list of supported models on the server, along with model-specific configurations. ModelParams includes "model_name" (name of the model), "litellm_params" (litellm-specific parameters for the model), and "model_info" (additional info about the model such as id, mode, cost per token, etc).
|
||||||
|
|
||||||
|
- **litellm_settings**: *Optional[dict]*: Settings for the litellm module. You can specify multiple properties like "drop_params", "set_verbose", "api_base", "cache".
|
||||||
|
|
||||||
|
- **general_settings**: *Optional[ConfigGeneralSettings]*: General settings for the server like "completion_model" (default model for chat completion calls), "use_azure_key_vault" (option to load keys from azure key vault), "master_key" (key required for all calls to proxy), and others.
|
||||||
|
|
||||||
|
Please, refer to each class's description for a better understanding of the specific attributes within them.
|
||||||
|
|
||||||
|
Note: This is a mock endpoint primarily meant for demonstration purposes, and does not actually provide or change any configurations.
|
||||||
|
"""
|
||||||
|
return {"hello": "world"}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/test")
|
@router.get("/test")
|
||||||
async def test_endpoint(request: Request):
|
async def test_endpoint(request: Request):
|
||||||
return {"route": request.url.path}
|
return {"route": request.url.path}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue