diff --git a/litellm/router.py b/litellm/router.py index 940e3c90d..0688dc61f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -285,7 +285,7 @@ class Router: "messages": messages, "functions": functions, "function_call": function_call, - "timeout": timeout, + "timeout": timeout or self.timeout, "temperature": temperature, "top_p": top_p, "n": n, @@ -316,7 +316,7 @@ class Router: future = executor.submit( self.function_with_fallbacks, **kwargs, **completion_kwargs ) - response = future.result(timeout=timeout) # type: ignore + response = future.result() # type: ignore return response except Exception as e: @@ -417,7 +417,7 @@ class Router: "messages": messages, "functions": functions, "function_call": function_call, - "timeout": timeout, + "timeout": timeout or self.timeout, "temperature": temperature, "top_p": top_p, "n": n, @@ -442,7 +442,6 @@ class Router: "original_function": self._acompletion, } kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries) - timeout = kwargs.get("request_timeout", self.timeout) kwargs.setdefault("metadata", {}).update({"model_group": model}) response = await self.async_function_with_fallbacks( diff --git a/litellm/tests/test_timeout.py b/litellm/tests/test_timeout.py index 84aa7537f..1902e1a36 100644 --- a/litellm/tests/test_timeout.py +++ b/litellm/tests/test_timeout.py @@ -39,6 +39,8 @@ def test_timeout(): def test_hanging_request_azure(): litellm.set_verbose = True + import asyncio + try: router = litellm.Router( model_list=[ @@ -58,13 +60,20 @@ def test_hanging_request_azure(): ) encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0] - response = router.completion( - model="azure-gpt", - messages=[{"role": "user", "content": f"what color is red {uuid.uuid4()}"}], - logit_bias={encoded: 100}, - timeout=0.01, - ) - print(response) + + async def _test(): + response = await router.acompletion( + model="azure-gpt", + messages=[ + {"role": "user", "content": f"what color is red {uuid.uuid4()}"} + ], + logit_bias={encoded: 100}, + timeout=0.01, + ) + print(response) + return response + + response = asyncio.run(_test()) if response.choices[0].message.content is not None: pytest.fail("Got a response, expected a timeout")