fix(router.py): fix cooldown logic for usage-based-routing-v2 pre-call-checks

This commit is contained in:
Krrish Dholakia 2024-05-31 21:32:01 -07:00
parent f3c37f487a
commit e49325b234
3 changed files with 45 additions and 10 deletions

View file

@ -430,6 +430,10 @@ def mock_completion(
model=model, # type: ignore
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
)
time_delay = kwargs.get("mock_delay", None)
if time_delay is not None:
time.sleep(time_delay)
model_response = ModelResponse(stream=stream)
if stream is True:
# don't try to access stream object,
@ -880,6 +884,7 @@ def completion(
mock_response=mock_response,
logging=logging,
acompletion=acompletion,
mock_delay=kwargs.get("mock_delay", None),
)
if custom_llm_provider == "azure":
# azure configs