fix(main.py): keep client consistent across calls + exponential backoff retry on ratelimit errors

2025-04-25 18:54:30 +00:00 · 2023-11-14 16:25:36 -08:00 · 2023-11-14 16:25:36 -08:00 · a7222f257c
commit a7222f257c
parent 5963d9d283
9 changed files with 239 additions and 131 deletions
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -6,11 +6,14 @@ import time
 from typing import Callable, Optional
 from litellm.utils import ModelResponse, Usage
 import litellm
+import httpx

 class VertexAIError(Exception):
    def __init__(self, status_code, message):
        self.status_code = status_code
        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.ai21.com/studio/v1/")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs