refactor: complete migration

This commit is contained in:
Krrish Dholakia 2025-03-24 19:52:45 -07:00
parent bfc159172d
commit cb6e9fbe40
32 changed files with 203 additions and 210 deletions

View file

@ -4,6 +4,7 @@ import time
from typing import Callable, List, Union
import litellm
from litellm.constants import REPLICATE_POLLING_DELAY_SECONDS
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
@ -28,7 +29,9 @@ def handle_prediction_response_streaming(
status = ""
while True and (status not in ["succeeded", "failed", "canceled"]):
time.sleep(0.5) # prevent being rate limited by replicate
time.sleep(
REPLICATE_POLLING_DELAY_SECONDS
) # prevent being rate limited by replicate
print_verbose(f"replicate: polling endpoint: {prediction_url}")
response = http_client.get(prediction_url, headers=headers)
if response.status_code == 200:
@ -77,7 +80,9 @@ async def async_handle_prediction_response_streaming(
status = ""
while True and (status not in ["succeeded", "failed", "canceled"]):
await asyncio.sleep(0.5) # prevent being rate limited by replicate
await asyncio.sleep(
REPLICATE_POLLING_DELAY_SECONDS
) # prevent being rate limited by replicate
print_verbose(f"replicate: polling endpoint: {prediction_url}")
response = await http_client.get(prediction_url, headers=headers)
if response.status_code == 200: