refactor: complete migration

2025-04-25 18:54:30 +00:00 · 2025-03-24 19:52:45 -07:00 · 2025-03-24 19:52:45 -07:00 · cb6e9fbe40
commit cb6e9fbe40
parent bfc159172d
32 changed files with 203 additions and 210 deletions
--- a/litellm/llms/replicate/chat/handler.py
+++ b/litellm/llms/replicate/chat/handler.py
@ -4,6 +4,7 @@ import time
 from typing import Callable, List, Union

 import litellm
+from litellm.constants import REPLICATE_POLLING_DELAY_SECONDS
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
@ -28,7 +29,9 @@ def handle_prediction_response_streaming(

    status = ""
    while True and (status not in ["succeeded", "failed", "canceled"]):
-        time.sleep(0.5)  # prevent being rate limited by replicate
+        time.sleep(
+            REPLICATE_POLLING_DELAY_SECONDS
+        )  # prevent being rate limited by replicate
        print_verbose(f"replicate: polling endpoint: {prediction_url}")
        response = http_client.get(prediction_url, headers=headers)
        if response.status_code == 200:
@ -77,7 +80,9 @@ async def async_handle_prediction_response_streaming(

    status = ""
    while True and (status not in ["succeeded", "failed", "canceled"]):
-        await asyncio.sleep(0.5)  # prevent being rate limited by replicate
+        await asyncio.sleep(
+            REPLICATE_POLLING_DELAY_SECONDS
+        )  # prevent being rate limited by replicate
        print_verbose(f"replicate: polling endpoint: {prediction_url}")
        response = await http_client.get(prediction_url, headers=headers)
        if response.status_code == 200: