From 807e9dcea8c71fd6f0a791e152f43b3d1cd4de1b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 23 Oct 2024 14:09:35 +0530 Subject: [PATCH] (docs + testing) Correctly document the timeout value used by litellm proxy is 6000 seconds + add to best practices for prod (#6339) * fix docs use documented timeout * document request timeout * add test for litellm.request_timeout * add test for checking value of timeout --- docs/my-website/docs/proxy/cli.md | 2 +- docs/my-website/docs/proxy/configs.md | 2 ++ docs/my-website/docs/proxy/prod.md | 1 + docs/my-website/docs/simple_proxy_old_doc.md | 2 +- .../health_endpoints/_health_endpoints.py | 29 ++++++++++++++++++- litellm/proxy/proxy_cli.py | 2 +- tests/local_testing/test_proxy_server.py | 20 +++++++++++++ 7 files changed, 54 insertions(+), 4 deletions(-) diff --git a/docs/my-website/docs/proxy/cli.md b/docs/my-website/docs/proxy/cli.md index 5ce7a05ec..d0c477a4e 100644 --- a/docs/my-website/docs/proxy/cli.md +++ b/docs/my-website/docs/proxy/cli.md @@ -135,7 +135,7 @@ Cli arguments, --host, --port, --num_workers ``` ## --request_timeout - - **Default:** `600` + - **Default:** `6000` - **Type:** `int` - Set the timeout in seconds for completion calls. - **Usage:** diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index aa79242d4..bf16a96e6 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -625,6 +625,7 @@ litellm_settings: redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging. langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging + request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION json_logs: boolean # if true, logs will be in json format @@ -721,6 +722,7 @@ general_settings: | set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION | | json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) | | default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) | +| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) | | content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) | | context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) | | cache | boolean | If true, enables caching. [Further docs](./caching) | diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index c42b07d8e..99fa19e77 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -21,6 +21,7 @@ general_settings: database_connection_pool_limit: 10 # limit the number of database connections to = MAX Number of DB Connections/Number of instances of litellm proxy (Around 10-20 is good number) litellm_settings: + request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on json_logs: true # Get debug logs in json format ``` diff --git a/docs/my-website/docs/simple_proxy_old_doc.md b/docs/my-website/docs/simple_proxy_old_doc.md index 2d68db329..64491b1ea 100644 --- a/docs/my-website/docs/simple_proxy_old_doc.md +++ b/docs/my-website/docs/simple_proxy_old_doc.md @@ -1312,7 +1312,7 @@ LiteLLM proxy adds **0.00325 seconds** latency as compared to using the Raw Open ``` #### --request_timeout - - **Default:** `600` + - **Default:** `6000` - **Type:** `int` - Set the timeout in seconds for completion calls. - **Usage:** diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py index 78b2a3d20..1f2eb5d6d 100644 --- a/litellm/proxy/health_endpoints/_health_endpoints.py +++ b/litellm/proxy/health_endpoints/_health_endpoints.py @@ -372,6 +372,11 @@ async def _db_health_readiness_check(): return db_health_cache +@router.get( + "/settings", + tags=["health"], + dependencies=[Depends(user_api_key_auth)], +) @router.get( "/active/callbacks", tags=["health"], @@ -379,8 +384,29 @@ async def _db_health_readiness_check(): ) async def active_callbacks(): """ - Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback + Returns a list of litellm level settings + + This is useful for debugging and ensuring the proxy server is configured correctly. + + Response schema: + ``` + { + "alerting": _alerting, + "litellm.callbacks": litellm_callbacks, + "litellm.input_callback": litellm_input_callbacks, + "litellm.failure_callback": litellm_failure_callbacks, + "litellm.success_callback": litellm_success_callbacks, + "litellm._async_success_callback": litellm_async_success_callbacks, + "litellm._async_failure_callback": litellm_async_failure_callbacks, + "litellm._async_input_callback": litellm_async_input_callbacks, + "all_litellm_callbacks": all_litellm_callbacks, + "num_callbacks": len(all_litellm_callbacks), + "num_alerting": _num_alerting, + "litellm.request_timeout": litellm.request_timeout, + } + ``` """ + from litellm.proxy.proxy_server import general_settings, proxy_logging_obj _alerting = str(general_settings.get("alerting")) @@ -421,6 +447,7 @@ async def active_callbacks(): "all_litellm_callbacks": all_litellm_callbacks, "num_callbacks": len(all_litellm_callbacks), "num_alerting": _num_alerting, + "litellm.request_timeout": litellm.request_timeout, } diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index d683bfe09..1fb628a80 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -125,7 +125,7 @@ def is_port_in_use(port): ) @click.option( "--request_timeout", - default=600, + default=6000, type=int, help="Set timeout in seconds for completion calls", ) diff --git a/tests/local_testing/test_proxy_server.py b/tests/local_testing/test_proxy_server.py index 3dbe417ea..d76894ce6 100644 --- a/tests/local_testing/test_proxy_server.py +++ b/tests/local_testing/test_proxy_server.py @@ -173,6 +173,26 @@ def test_chat_completion(mock_acompletion, client_no_auth): pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}") +def test_get_settings_request_timeout(client_no_auth): + """ + When no timeout is set, it should use the litellm.request_timeout value + """ + # Set a known value for litellm.request_timeout + import litellm + + # Make a GET request to /settings + response = client_no_auth.get("/settings") + + # Check if the request was successful + assert response.status_code == 200 + + # Parse the JSON response + settings = response.json() + print("settings", settings) + + assert settings["litellm.request_timeout"] == litellm.request_timeout + + @pytest.mark.parametrize( "litellm_key_header_name", ["x-litellm-key", None],