mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
We use the DEFAULT_REPLICATE_ constants for retry count and initial delay. If the completion response returns status=processing, we loop to retry. Fixes https://github.com/BerriAI/litellm/issues/7900 Signed-off-by: BJ Hargrave <hargrave@us.ibm.com> Co-authored-by: BJ Hargrave <bj@hargrave.dev>
This commit is contained in:
parent
fe460f19f5
commit
b94f60632a
1 changed files with 16 additions and 6 deletions
|
@ -196,11 +196,16 @@ def completion(
|
|||
)
|
||||
return CustomStreamWrapper(_response, model, logging_obj=logging_obj, custom_llm_provider="replicate") # type: ignore
|
||||
else:
|
||||
for _ in range(litellm.DEFAULT_MAX_RETRIES):
|
||||
for retry in range(litellm.DEFAULT_REPLICATE_POLLING_RETRIES):
|
||||
time.sleep(
|
||||
1
|
||||
) # wait 1s to allow response to be generated by replicate - else partial output is generated with status=="processing"
|
||||
litellm.DEFAULT_REPLICATE_POLLING_DELAY_SECONDS + 2 * retry
|
||||
) # wait to allow response to be generated by replicate - else partial output is generated with status=="processing"
|
||||
response = httpx_client.get(url=prediction_url, headers=headers)
|
||||
if (
|
||||
response.status_code == 200
|
||||
and response.json().get("status") == "processing"
|
||||
):
|
||||
continue
|
||||
return litellm.ReplicateConfig().transform_response(
|
||||
model=model,
|
||||
raw_response=response,
|
||||
|
@ -259,11 +264,16 @@ async def async_completion(
|
|||
)
|
||||
return CustomStreamWrapper(_response, model, logging_obj=logging_obj, custom_llm_provider="replicate") # type: ignore
|
||||
|
||||
for _ in range(litellm.DEFAULT_REPLICATE_POLLING_RETRIES):
|
||||
for retry in range(litellm.DEFAULT_REPLICATE_POLLING_RETRIES):
|
||||
await asyncio.sleep(
|
||||
litellm.DEFAULT_REPLICATE_POLLING_DELAY_SECONDS
|
||||
) # wait 1s to allow response to be generated by replicate - else partial output is generated with status=="processing"
|
||||
litellm.DEFAULT_REPLICATE_POLLING_DELAY_SECONDS + 2 * retry
|
||||
) # wait to allow response to be generated by replicate - else partial output is generated with status=="processing"
|
||||
response = await async_handler.get(url=prediction_url, headers=headers)
|
||||
if (
|
||||
response.status_code == 200
|
||||
and response.json().get("status") == "processing"
|
||||
):
|
||||
continue
|
||||
return litellm.ReplicateConfig().transform_response(
|
||||
model=model,
|
||||
raw_response=response,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue