mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Fix azure max retries error (#8340)
* fix(azure.py): ensure max_retries=0 is respected Fixes https://github.com/BerriAI/litellm/issues/6129 * fix(test_openai.py): add unit test to ensure openai sdk calls always respect max_retries = 0 * test(test_azure_openai.py): add unit testing for azure_text/ route * fix(azure.py): fix passing max retries on streaming * fix(azure.py): fix azure max retries on async completion + streaming * fix(completion/handler.py): fix azure text async completion + streaming * test(test_azure_openai.py): ensure azure openai max retries always respected * test(test_azure_o_series.py): add testing to ensure max retries always respected * Added gemini providers for 2.0-flash and 2.0-flash lite (#8321) * Update model_prices_and_context_window.json added gemini providers for 2.0-flash and 2.0-flash light * Update model_prices_and_context_window.json fixed URL --------- Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * Convert tool use arguments to string before counting tokens (#6989) In at least some cases the `messages["tool_calls"]["function"]["arguments"]` is a dict, not a string. In order to tokenize it properly it needs to be a string. In the case that it is already a string this is a noop, which is also fine. * build(model_prices_and_context_window.json): add gemini 2.0 flash lite pricing * build(model_prices_and_context_window.json): add gemini commercial rate limits * fix(utils.py): fix linting error * refactor(utils.py): refactor to maintain function size --------- Co-authored-by: Bardia Khosravi <bardiakhosravi95@gmail.com> Co-authored-by: Josh Morrow <josh@jcmorrow.com>
This commit is contained in:
parent
d720744656
commit
6b8b49451f
7 changed files with 176 additions and 28 deletions
|
@ -131,6 +131,7 @@ class AzureTextCompletion(BaseLLM):
|
|||
timeout=timeout,
|
||||
client=client,
|
||||
logging_obj=logging_obj,
|
||||
max_retries=max_retries,
|
||||
)
|
||||
elif "stream" in optional_params and optional_params["stream"] is True:
|
||||
return self.streaming(
|
||||
|
@ -236,17 +237,12 @@ class AzureTextCompletion(BaseLLM):
|
|||
timeout: Any,
|
||||
model_response: ModelResponse,
|
||||
logging_obj: Any,
|
||||
max_retries: int,
|
||||
azure_ad_token: Optional[str] = None,
|
||||
client=None, # this is the AsyncAzureOpenAI
|
||||
):
|
||||
response = None
|
||||
try:
|
||||
max_retries = data.pop("max_retries", 2)
|
||||
if not isinstance(max_retries, int):
|
||||
raise AzureOpenAIError(
|
||||
status_code=422, message="max retries must be an int"
|
||||
)
|
||||
|
||||
# init AzureOpenAI Client
|
||||
azure_client_params = {
|
||||
"api_version": api_version,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue