fix(proxy_cli.py): allow user to control db connection pool + timeouts from config

This commit is contained in:
Krrish Dholakia 2024-02-17 19:03:32 -08:00
parent bf572b495a
commit 045d84e167
3 changed files with 29 additions and 11 deletions

View file

@ -538,17 +538,13 @@ model_list: # will route requests to the least busy ollama model
api_base: "http://127.0.0.1:8003" api_base: "http://127.0.0.1:8003"
``` ```
## Max Parallel Requests
To rate limit a user based on the number of parallel requests, e.g.: ## Configure DB Pool Limits + Connection Timeouts
if user's parallel requests > x, send a 429 error
if user's parallel requests <= x, let them use the API freely.
set the max parallel request limit on the config.yaml (note: this expects the user to be passing in an api key).
```yaml ```yaml
general_settings: general_settings:
max_parallel_requests: 100 # max parallel requests for a user = 100 database_connection_pool_limit: 100 # sets connection pool for prisma client to postgres db at 100
database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db
``` ```
## All settings ## All settings
@ -577,6 +573,8 @@ general_settings:
"key_management_system": "google_kms", # either google_kms or azure_kms "key_management_system": "google_kms", # either google_kms or azure_kms
"master_key": "string", "master_key": "string",
"database_url": "string", "database_url": "string",
"database_connection_pool_limit": 0, # default 100
"database_connection_timeout": 0, # default 60s
"database_type": "dynamo_db", "database_type": "dynamo_db",
"database_args": { "database_args": {
"billing_mode": "PROVISIONED_THROUGHPUT", "billing_mode": "PROVISIONED_THROUGHPUT",

View file

@ -311,6 +311,13 @@ class ConfigGeneralSettings(LiteLLMBase):
None, None,
description="connect to a postgres db - needed for generating temporary keys + tracking spend / key", description="connect to a postgres db - needed for generating temporary keys + tracking spend / key",
) )
database_connection_pool_limit: Optional[int] = Field(
100,
description="default connection pool for prisma client connecting to postgres db",
)
database_connection_timeout: Optional[float] = Field(
60, description="default timeout for a connection to the database"
)
database_type: Optional[Literal["dynamo_db"]] = Field( database_type: Optional[Literal["dynamo_db"]] = Field(
None, description="to use dynamodb instead of postgres db" None, description="to use dynamodb instead of postgres db"
) )

View file

@ -409,6 +409,8 @@ def run_server(
"uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`" "uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`"
) )
db_connection_pool_limit = 100
db_connection_timeout = 60
if config is not None: if config is not None:
""" """
Allow user to pass in db url via config Allow user to pass in db url via config
@ -427,6 +429,12 @@ def run_server(
proxy_config.load_config(router=None, config_file_path=config) proxy_config.load_config(router=None, config_file_path=config)
) )
database_url = general_settings.get("database_url", None) database_url = general_settings.get("database_url", None)
db_connection_pool_limit = general_settings.get(
"database_connection_pool_limit", 100
)
db_connection_timeout = general_settings.get(
"database_connection_timeout", 60
)
if database_url and database_url.startswith("os.environ/"): if database_url and database_url.startswith("os.environ/"):
original_dir = os.getcwd() original_dir = os.getcwd()
# set the working directory to where this script is # set the working directory to where this script is
@ -447,14 +455,19 @@ def run_server(
try: try:
if os.getenv("DATABASE_URL", None) is not None: if os.getenv("DATABASE_URL", None) is not None:
### add connection pool + pool timeout args ### add connection pool + pool timeout args
params = {"connection_limit": 100, "pool_timeout": 60} params = {
"connection_limit": db_connection_pool_limit,
"pool_timeout": db_connection_timeout,
}
database_url = os.getenv("DATABASE_URL") database_url = os.getenv("DATABASE_URL")
modified_url = append_query_params(database_url, params) modified_url = append_query_params(database_url, params)
os.environ["DATABASE_URL"] = modified_url os.environ["DATABASE_URL"] = modified_url
###
if os.getenv("DIRECT_URL", None) is not None: if os.getenv("DIRECT_URL", None) is not None:
### add connection pool + pool timeout args ### add connection pool + pool timeout args
params = {"connection_limit": 100, "pool_timeout": 60} params = {
"connection_limit": db_connection_pool_limit,
"pool_timeout": db_connection_timeout,
}
database_url = os.getenv("DIRECT_URL") database_url = os.getenv("DIRECT_URL")
modified_url = append_query_params(database_url, params) modified_url = append_query_params(database_url, params)
os.environ["DIRECT_URL"] = modified_url os.environ["DIRECT_URL"] = modified_url