Litellm dev 01 20 2025 p3 (#7890)

* fix(router.py): pass stream timeout correctly for non openai / azure models Fixes https://github.com/BerriAI/litellm/issues/7870 * test(test_router_timeout.py): add test for streaming * test(test_router_timeout.py): add unit testing for new router functions * docs(ollama.md): link to section on calling ollama within docker container * test: remove redundant test * test: fix test to include timeout value * docs(config_settings.md): document new router settings param
2025-04-27 11:43:54 +00:00 · 2025-01-20 21:46:36 -08:00 · 2025-01-20 21:46:36 -08:00 · 94c9f76767
commit 94c9f76767
parent 4c1d4acabc
6 changed files with 197 additions and 9 deletions
--- a/litellm/llms/openai_like/chat/handler.py
+++ b/litellm/llms/openai_like/chat/handler.py
@ -73,11 +73,14 @@ def make_sync_call(
    logging_obj,
    streaming_decoder: Optional[CustomStreamingDecoder] = None,
    fake_stream: bool = False,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
 ):
    if client is None:
        client = litellm.module_level_client  # Create a new client if none provided

-    response = client.post(api_base, headers=headers, data=data, stream=not fake_stream)
+    response = client.post(
+        api_base, headers=headers, data=data, stream=not fake_stream, timeout=timeout
+    )

    if response.status_code != 200:
        raise OpenAILikeError(status_code=response.status_code, message=response.read())
@ -352,6 +355,7 @@ class OpenAILikeChatHandler(OpenAILikeBase):
                    logging_obj=logging_obj,
                    streaming_decoder=streaming_decoder,
                    fake_stream=fake_stream,
+                    timeout=timeout,
                )
                # completion_stream.__iter__()
                return CustomStreamWrapper(