fix(main.py): keep client consistent across calls + exponential backoff retry on ratelimit errors

This commit is contained in:
Krrish Dholakia 2023-11-14 16:25:36 -08:00
parent 5963d9d283
commit a7222f257c
9 changed files with 239 additions and 131 deletions

View file

@ -6,11 +6,14 @@ import time
from typing import Callable, Optional
from litellm.utils import ModelResponse, Usage
import litellm
import httpx
class VertexAIError(Exception):
def __init__(self, status_code, message):
self.status_code = status_code
self.message = message
self.request = httpx.Request(method="POST", url="https://api.ai21.com/studio/v1/")
self.response = httpx.Response(status_code=status_code, request=self.request)
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs