mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix(router.py): fix cooldown logic for usage-based-routing-v2 pre-call-checks
This commit is contained in:
parent
f3c37f487a
commit
e49325b234
3 changed files with 45 additions and 10 deletions
|
@ -430,6 +430,10 @@ def mock_completion(
|
|||
model=model, # type: ignore
|
||||
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
|
||||
)
|
||||
time_delay = kwargs.get("mock_delay", None)
|
||||
if time_delay is not None:
|
||||
time.sleep(time_delay)
|
||||
|
||||
model_response = ModelResponse(stream=stream)
|
||||
if stream is True:
|
||||
# don't try to access stream object,
|
||||
|
@ -880,6 +884,7 @@ def completion(
|
|||
mock_response=mock_response,
|
||||
logging=logging,
|
||||
acompletion=acompletion,
|
||||
mock_delay=kwargs.get("mock_delay", None),
|
||||
)
|
||||
if custom_llm_provider == "azure":
|
||||
# azure configs
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue