Litellm dev 01 20 2025 p3 (#7890)

* fix(router.py): pass stream timeout correctly for non openai / azure models

Fixes https://github.com/BerriAI/litellm/issues/7870

* test(test_router_timeout.py): add test for streaming

* test(test_router_timeout.py): add unit testing for new router functions

* docs(ollama.md): link to section on calling ollama within docker container

* test: remove redundant test

* test: fix test to include timeout value

* docs(config_settings.md): document new router settings param
This commit is contained in:
Krish Dholakia 2025-01-20 21:46:36 -08:00 committed by GitHub
parent 4c1d4acabc
commit 94c9f76767
6 changed files with 197 additions and 9 deletions

View file

@ -73,11 +73,14 @@ def make_sync_call(
logging_obj,
streaming_decoder: Optional[CustomStreamingDecoder] = None,
fake_stream: bool = False,
timeout: Optional[Union[float, httpx.Timeout]] = None,
):
if client is None:
client = litellm.module_level_client # Create a new client if none provided
response = client.post(api_base, headers=headers, data=data, stream=not fake_stream)
response = client.post(
api_base, headers=headers, data=data, stream=not fake_stream, timeout=timeout
)
if response.status_code != 200:
raise OpenAILikeError(status_code=response.status_code, message=response.read())
@ -352,6 +355,7 @@ class OpenAILikeChatHandler(OpenAILikeBase):
logging_obj=logging_obj,
streaming_decoder=streaming_decoder,
fake_stream=fake_stream,
timeout=timeout,
)
# completion_stream.__iter__()
return CustomStreamWrapper(