mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
feat(proxy_server.py): enable batch completion fastest response calls on proxy
introduces new `fastest_response` flag for enabling the call
This commit is contained in:
parent
ecd182eb6a
commit
20106715d5
3 changed files with 32 additions and 3 deletions
|
@ -804,9 +804,16 @@ class Router:
|
|||
pending_tasks.append(task)
|
||||
|
||||
responses = await asyncio.gather(*_tasks, return_exceptions=True)
|
||||
if isinstance(responses[0], Exception):
|
||||
if isinstance(responses[0], Exception) or isinstance(
|
||||
responses[0], BaseException
|
||||
):
|
||||
raise responses[0]
|
||||
return responses[0] # return first value from list
|
||||
_response: Union[ModelResponse, CustomStreamWrapper] = responses[
|
||||
0
|
||||
] # return first value from list
|
||||
|
||||
_response._hidden_params["fastest_response_batch_completion"] = True
|
||||
return _response
|
||||
|
||||
def image_generation(self, prompt: str, model: str, **kwargs):
|
||||
try:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue