Merge pull request #5358 from BerriAI/litellm_fix_retry_after

fix retry after - cooldown individual models based on their specific 'retry-after' header
This commit is contained in:
Krish Dholakia 2024-08-27 11:50:14 -07:00 committed by GitHub
commit 415abc86c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 754 additions and 202 deletions

View file

@ -452,7 +452,12 @@ async def _async_streaming(response, model, custom_llm_provider, args):
print_verbose(f"line in async streaming: {line}")
yield line
except Exception as e:
raise e
custom_llm_provider = custom_llm_provider or "openai"
raise exception_type(
model=model,
custom_llm_provider=custom_llm_provider,
original_exception=e,
)
def mock_completion(
@ -3765,7 +3770,7 @@ async def atext_completion(
else:
# Call the synchronous function using run_in_executor
response = await loop.run_in_executor(None, func_with_context)
if kwargs.get("stream", False) == True: # return an async generator
if kwargs.get("stream", False) is True: # return an async generator
return TextCompletionStreamWrapper(
completion_stream=_async_streaming(
response=response,
@ -3774,6 +3779,7 @@ async def atext_completion(
args=args,
),
model=model,
custom_llm_provider=custom_llm_provider,
)
else:
transformed_logprobs = None
@ -4047,11 +4053,14 @@ def text_completion(
**kwargs,
**optional_params,
)
if kwargs.get("acompletion", False) == True:
if kwargs.get("acompletion", False) is True:
return response
if stream == True or kwargs.get("stream", False) == True:
if stream is True or kwargs.get("stream", False) is True:
response = TextCompletionStreamWrapper(
completion_stream=response, model=model, stream_options=stream_options
completion_stream=response,
model=model,
stream_options=stream_options,
custom_llm_provider=custom_llm_provider,
)
return response
transformed_logprobs = None