forked from phoenix/litellm-mirror
(docs + testing) Correctly document the timeout value used by litellm proxy is 6000 seconds + add to best practices for prod (#6339)
* fix docs use documented timeout * document request timeout * add test for litellm.request_timeout * add test for checking value of timeout
This commit is contained in:
parent
64c3d3210c
commit
807e9dcea8
7 changed files with 54 additions and 4 deletions
|
@ -135,7 +135,7 @@ Cli arguments, --host, --port, --num_workers
|
||||||
```
|
```
|
||||||
|
|
||||||
## --request_timeout
|
## --request_timeout
|
||||||
- **Default:** `600`
|
- **Default:** `6000`
|
||||||
- **Type:** `int`
|
- **Type:** `int`
|
||||||
- Set the timeout in seconds for completion calls.
|
- Set the timeout in seconds for completion calls.
|
||||||
- **Usage:**
|
- **Usage:**
|
||||||
|
|
|
@ -625,6 +625,7 @@ litellm_settings:
|
||||||
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
||||||
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
|
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
|
||||||
|
|
||||||
|
request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
|
||||||
|
|
||||||
set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
|
set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
|
||||||
json_logs: boolean # if true, logs will be in json format
|
json_logs: boolean # if true, logs will be in json format
|
||||||
|
@ -721,6 +722,7 @@ general_settings:
|
||||||
| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION |
|
| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION |
|
||||||
| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
|
| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
|
||||||
| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
|
| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
|
||||||
|
| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) |
|
||||||
| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
|
| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
|
||||||
| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
|
| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
|
||||||
| cache | boolean | If true, enables caching. [Further docs](./caching) |
|
| cache | boolean | If true, enables caching. [Further docs](./caching) |
|
||||||
|
|
|
@ -21,6 +21,7 @@ general_settings:
|
||||||
database_connection_pool_limit: 10 # limit the number of database connections to = MAX Number of DB Connections/Number of instances of litellm proxy (Around 10-20 is good number)
|
database_connection_pool_limit: 10 # limit the number of database connections to = MAX Number of DB Connections/Number of instances of litellm proxy (Around 10-20 is good number)
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
|
request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
|
||||||
set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
|
set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
|
||||||
json_logs: true # Get debug logs in json format
|
json_logs: true # Get debug logs in json format
|
||||||
```
|
```
|
||||||
|
|
|
@ -1312,7 +1312,7 @@ LiteLLM proxy adds **0.00325 seconds** latency as compared to using the Raw Open
|
||||||
```
|
```
|
||||||
|
|
||||||
#### --request_timeout
|
#### --request_timeout
|
||||||
- **Default:** `600`
|
- **Default:** `6000`
|
||||||
- **Type:** `int`
|
- **Type:** `int`
|
||||||
- Set the timeout in seconds for completion calls.
|
- Set the timeout in seconds for completion calls.
|
||||||
- **Usage:**
|
- **Usage:**
|
||||||
|
|
|
@ -372,6 +372,11 @@ async def _db_health_readiness_check():
|
||||||
return db_health_cache
|
return db_health_cache
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/settings",
|
||||||
|
tags=["health"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
@router.get(
|
@router.get(
|
||||||
"/active/callbacks",
|
"/active/callbacks",
|
||||||
tags=["health"],
|
tags=["health"],
|
||||||
|
@ -379,8 +384,29 @@ async def _db_health_readiness_check():
|
||||||
)
|
)
|
||||||
async def active_callbacks():
|
async def active_callbacks():
|
||||||
"""
|
"""
|
||||||
Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback
|
Returns a list of litellm level settings
|
||||||
|
|
||||||
|
This is useful for debugging and ensuring the proxy server is configured correctly.
|
||||||
|
|
||||||
|
Response schema:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"alerting": _alerting,
|
||||||
|
"litellm.callbacks": litellm_callbacks,
|
||||||
|
"litellm.input_callback": litellm_input_callbacks,
|
||||||
|
"litellm.failure_callback": litellm_failure_callbacks,
|
||||||
|
"litellm.success_callback": litellm_success_callbacks,
|
||||||
|
"litellm._async_success_callback": litellm_async_success_callbacks,
|
||||||
|
"litellm._async_failure_callback": litellm_async_failure_callbacks,
|
||||||
|
"litellm._async_input_callback": litellm_async_input_callbacks,
|
||||||
|
"all_litellm_callbacks": all_litellm_callbacks,
|
||||||
|
"num_callbacks": len(all_litellm_callbacks),
|
||||||
|
"num_alerting": _num_alerting,
|
||||||
|
"litellm.request_timeout": litellm.request_timeout,
|
||||||
|
}
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from litellm.proxy.proxy_server import general_settings, proxy_logging_obj
|
from litellm.proxy.proxy_server import general_settings, proxy_logging_obj
|
||||||
|
|
||||||
_alerting = str(general_settings.get("alerting"))
|
_alerting = str(general_settings.get("alerting"))
|
||||||
|
@ -421,6 +447,7 @@ async def active_callbacks():
|
||||||
"all_litellm_callbacks": all_litellm_callbacks,
|
"all_litellm_callbacks": all_litellm_callbacks,
|
||||||
"num_callbacks": len(all_litellm_callbacks),
|
"num_callbacks": len(all_litellm_callbacks),
|
||||||
"num_alerting": _num_alerting,
|
"num_alerting": _num_alerting,
|
||||||
|
"litellm.request_timeout": litellm.request_timeout,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -125,7 +125,7 @@ def is_port_in_use(port):
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--request_timeout",
|
"--request_timeout",
|
||||||
default=600,
|
default=6000,
|
||||||
type=int,
|
type=int,
|
||||||
help="Set timeout in seconds for completion calls",
|
help="Set timeout in seconds for completion calls",
|
||||||
)
|
)
|
||||||
|
|
|
@ -173,6 +173,26 @@ def test_chat_completion(mock_acompletion, client_no_auth):
|
||||||
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_settings_request_timeout(client_no_auth):
|
||||||
|
"""
|
||||||
|
When no timeout is set, it should use the litellm.request_timeout value
|
||||||
|
"""
|
||||||
|
# Set a known value for litellm.request_timeout
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
# Make a GET request to /settings
|
||||||
|
response = client_no_auth.get("/settings")
|
||||||
|
|
||||||
|
# Check if the request was successful
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
# Parse the JSON response
|
||||||
|
settings = response.json()
|
||||||
|
print("settings", settings)
|
||||||
|
|
||||||
|
assert settings["litellm.request_timeout"] == litellm.request_timeout
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"litellm_key_header_name",
|
"litellm_key_header_name",
|
||||||
["x-litellm-key", None],
|
["x-litellm-key", None],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue