fix(main.py): keep client consistent across calls + exponential backoff retry on ratelimit errors

This commit is contained in:
Krrish Dholakia 2023-11-14 16:25:36 -08:00
parent 5963d9d283
commit a7222f257c
9 changed files with 239 additions and 131 deletions

View file

@ -9,13 +9,25 @@ class BaseLLM:
if litellm.client_session:
_client_session = litellm.client_session
else:
_client_session = httpx.Client(timeout=600)
_client_session = httpx.Client(timeout=litellm.request_timeout)
return _client_session
def create_aclient_session(self):
if litellm.aclient_session:
_aclient_session = litellm.aclient_session
else:
_aclient_session = httpx.AsyncClient(timeout=litellm.request_timeout)
return _aclient_session
def __exit__(self):
if hasattr(self, '_client_session'):
self._client_session.close()
async def __aexit__(self, exc_type, exc_val, exc_tb):
if hasattr(self, '_aclient_session'):
await self._aclient_session.aclose()
def validate_environment(self): # set up the environment required to run the model
pass