perf(router.py): don't use asyncio.wait for - just pass it to the completion call for timeouts

This commit is contained in:
Krrish Dholakia 2024-01-06 17:05:55 +05:30
parent 712f89b4f1
commit 2d8d7e3569

View file

@ -352,18 +352,16 @@ class Router:
else: else:
model_client = potential_model_client model_client = potential_model_client
self.total_calls[model_name] += 1 self.total_calls[model_name] += 1
response = await asyncio.wait_for( response = await litellm.acompletion(
litellm.acompletion(
**{ **{
**data, **data,
"messages": messages, "messages": messages,
"caching": self.cache_responses, "caching": self.cache_responses,
"client": model_client, "client": model_client,
"timeout": self.timeout,
**kwargs, **kwargs,
} }
), )
timeout=self.timeout,
)
self.success_calls[model_name] += 1 self.success_calls[model_name] += 1
return response return response
except Exception as e: except Exception as e:
@ -614,18 +612,16 @@ class Router:
else: else:
model_client = potential_model_client model_client = potential_model_client
self.total_calls[model_name] += 1 self.total_calls[model_name] += 1
response = await asyncio.wait_for( response = await litellm.atext_completion(
litellm.atext_completion(
**{ **{
**data, **data,
"prompt": prompt, "prompt": prompt,
"caching": self.cache_responses, "caching": self.cache_responses,
"client": model_client, "client": model_client,
"timeout": self.timeout,
**kwargs, **kwargs,
} }
), )
timeout=self.timeout,
)
self.success_calls[model_name] += 1 self.success_calls[model_name] += 1
return response return response
except Exception as e: except Exception as e: