fix: fix streaming with httpx client

prevent overwriting streams in parallel streaming calls
2025-04-25 18:54:30 +00:00 · 2024-05-31 10:55:18 -07:00 · 2024-05-31 10:55:18 -07:00 · 93c3635b64
commit 93c3635b64
parent aada7b4bd3
9 changed files with 182 additions and 82 deletions
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@ -251,7 +251,7 @@ async def async_handle_prediction_response(
    logs = ""
    while True and (status not in ["succeeded", "failed", "canceled"]):
        print_verbose(f"replicate: polling endpoint: {prediction_url}")
-        await asyncio.sleep(0.5)
+        await asyncio.sleep(0.5)  # prevent replicate rate limit errors
        response = await http_handler.get(prediction_url, headers=headers)
        if response.status_code == 200:
            response_data = response.json()