forked from phoenix/litellm-mirror
fix(router.py): fix client init for streaming timeouts
This commit is contained in:
parent
f0c4ff6e60
commit
695eb129ad
4 changed files with 29 additions and 7 deletions
|
@ -609,6 +609,7 @@ def completion(
|
||||||
"cache",
|
"cache",
|
||||||
"no-log",
|
"no-log",
|
||||||
"base_model",
|
"base_model",
|
||||||
|
"stream_timeout",
|
||||||
]
|
]
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {
|
non_default_params = {
|
||||||
|
|
|
@ -4,6 +4,14 @@ model_list:
|
||||||
model: openai/my-fake-model
|
model: openai/my-fake-model
|
||||||
api_key: my-fake-key
|
api_key: my-fake-key
|
||||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||||
|
stream_timeout: 0.001
|
||||||
|
- litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
stream_timeout: 0.001
|
||||||
|
model_name: azure-gpt-3.5
|
||||||
- model_name: gpt-instruct
|
- model_name: gpt-instruct
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-3.5-turbo-instruct
|
model: gpt-3.5-turbo-instruct
|
||||||
|
|
|
@ -252,24 +252,31 @@ def test_stream_timeouts_router():
|
||||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
"api_base": os.getenv("AZURE_API_BASE"),
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
"timeout": 200, # regular calls will not timeout, stream calls will
|
"timeout": 200, # regular calls will not timeout, stream calls will
|
||||||
"stream_timeout": 0.000_001,
|
"stream_timeout": 10,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
router = Router(model_list=model_list)
|
router = Router(model_list=model_list)
|
||||||
|
|
||||||
print("PASSED !")
|
print("PASSED !")
|
||||||
|
data = {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
selected_client = router._get_client(
|
selected_client = router._get_client(
|
||||||
deployment=router.model_list[0],
|
deployment=router.model_list[0],
|
||||||
kwargs={
|
kwargs=data,
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
|
|
||||||
"stream": True,
|
|
||||||
},
|
|
||||||
client_type=None,
|
client_type=None,
|
||||||
)
|
)
|
||||||
print("Select client timeout", selected_client.timeout)
|
print("Select client timeout", selected_client.timeout)
|
||||||
assert selected_client.timeout == 0.000_001
|
assert selected_client.timeout == 10
|
||||||
|
|
||||||
|
# make actual call
|
||||||
|
response = router.completion(**data)
|
||||||
|
|
||||||
|
for chunk in response:
|
||||||
|
print(f"chunk: {chunk}")
|
||||||
except openai.APITimeoutError as e:
|
except openai.APITimeoutError as e:
|
||||||
print(
|
print(
|
||||||
"Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
|
"Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
|
||||||
|
|
|
@ -9,12 +9,18 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "gpt-3.5-turbo-1106"
|
model: "gpt-3.5-turbo-1106"
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
rpm: 480
|
||||||
|
timeout: 300
|
||||||
|
stream_timeout: 60
|
||||||
- model_name: gpt-4
|
- model_name: gpt-4
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: azure/chatgpt-v-2
|
model: azure/chatgpt-v-2
|
||||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||||
api_version: "2023-05-15"
|
api_version: "2023-05-15"
|
||||||
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||||
|
rpm: 480
|
||||||
|
timeout: 300
|
||||||
|
stream_timeout: 60
|
||||||
- model_name: sagemaker-completion-model
|
- model_name: sagemaker-completion-model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
|
model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue