From 6252987798dc72b6bf3c1382b12e10573fdaa29a Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 30 Dec 2023 11:17:31 +0530 Subject: [PATCH] (docs) proxy - set timeout per request --- docs/my-website/docs/proxy/reliability.md | 52 +++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md index 75f43bcdc..269fbf607 100644 --- a/docs/my-website/docs/proxy/reliability.md +++ b/docs/my-website/docs/proxy/reliability.md @@ -1,3 +1,7 @@ +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Fallbacks, Retries, Timeouts, Cooldowns If a call fails after num_retries, fall back to another model group. @@ -87,3 +91,51 @@ model_list: $ litellm --config /path/to/config.yaml ``` + +## Setting Dynamic Timeouts - Per Request + +LiteLLM Proxy supports setting a `timeout` per request + +**Example Usage** + + + +```shell +curl --location 'http://0.0.0.0:8000/chat/completions' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "user", "content": "what color is red"} + ], + "logit_bias": {12481: 100}, + "timeout": 1 + }' +``` + + + +```python +import openai + + +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:8000" +) + +response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": "what color is red"} + ], + logit_bias={12481: 100}, + timeout=1 +) + +print(response) + + +``` + +