mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
(fix) completion: max_retries using OpenAI client
This commit is contained in:
parent
2bd934e56c
commit
11ec2710c6
4 changed files with 19 additions and 16 deletions
|
@ -136,7 +136,7 @@ class AzureChatCompletion(BaseLLM):
|
|||
elif "stream" in optional_params and optional_params["stream"] == True:
|
||||
return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout)
|
||||
else:
|
||||
azure_client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.client_session, timeout=timeout)
|
||||
azure_client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.client_session, timeout=timeout, max_retries=data.pop("max_retries", 2))
|
||||
response = azure_client.chat.completions.create(**data) # type: ignore
|
||||
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
||||
except AzureOpenAIError as e:
|
||||
|
@ -156,7 +156,7 @@ class AzureChatCompletion(BaseLLM):
|
|||
azure_ad_token: Optional[str]=None, ):
|
||||
response = None
|
||||
try:
|
||||
azure_client = AsyncAzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.aclient_session, timeout=timeout)
|
||||
azure_client = AsyncAzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.aclient_session, timeout=timeout, max_retries=data.pop("max_retries", 2))
|
||||
response = await azure_client.chat.completions.create(**data)
|
||||
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
||||
except Exception as e:
|
||||
|
@ -177,7 +177,7 @@ class AzureChatCompletion(BaseLLM):
|
|||
timeout: Any,
|
||||
azure_ad_token: Optional[str]=None,
|
||||
):
|
||||
azure_client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.client_session, timeout=timeout)
|
||||
azure_client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.client_session, timeout=timeout, max_retries=data.pop("max_retries", 2))
|
||||
response = azure_client.chat.completions.create(**data)
|
||||
streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
|
||||
for transformed_chunk in streamwrapper:
|
||||
|
@ -192,7 +192,7 @@ class AzureChatCompletion(BaseLLM):
|
|||
model: str,
|
||||
timeout: Any,
|
||||
azure_ad_token: Optional[str]=None):
|
||||
azure_client = AsyncAzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.aclient_session, timeout=timeout)
|
||||
azure_client = AsyncAzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.aclient_session, timeout=timeout, max_retries=data.pop("max_retries", 2))
|
||||
response = await azure_client.chat.completions.create(**data)
|
||||
streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
|
||||
async for transformed_chunk in streamwrapper:
|
||||
|
@ -213,12 +213,12 @@ class AzureChatCompletion(BaseLLM):
|
|||
if self._client_session is None:
|
||||
self._client_session = self.create_client_session()
|
||||
try:
|
||||
azure_client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.client_session)
|
||||
data = {
|
||||
"model": model,
|
||||
"input": input,
|
||||
**optional_params
|
||||
}
|
||||
azure_client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=api_base, azure_deployment=model, azure_ad_token=azure_ad_token, http_client=litellm.client_session, max_retries=data.pop("max_retries", 2))
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
|
|
|
@ -207,7 +207,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
elif optional_params.get("stream", False):
|
||||
return self.streaming(logging_obj=logging_obj, data=data, model=model, api_base=api_base, api_key=api_key, timeout=timeout)
|
||||
else:
|
||||
openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout)
|
||||
openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout, max_retries=data.pop("max_retries", 2))
|
||||
response = openai_client.chat.completions.create(**data) # type: ignore
|
||||
logging_obj.post_call(
|
||||
input=None,
|
||||
|
@ -249,7 +249,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
api_base: Optional[str]=None):
|
||||
response = None
|
||||
try:
|
||||
openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout)
|
||||
openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout, max_retries=data.pop("max_retries", 2))
|
||||
response = await openai_aclient.chat.completions.create(**data)
|
||||
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
||||
except Exception as e:
|
||||
|
@ -269,7 +269,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
api_key: Optional[str]=None,
|
||||
api_base: Optional[str]=None
|
||||
):
|
||||
openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout)
|
||||
openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout, max_retries=data.pop("max_retries", 2))
|
||||
response = openai_client.chat.completions.create(**data)
|
||||
streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="openai",logging_obj=logging_obj)
|
||||
for transformed_chunk in streamwrapper:
|
||||
|
@ -284,7 +284,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
api_base: Optional[str]=None):
|
||||
response = None
|
||||
try:
|
||||
openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout)
|
||||
openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout, max_retries=data.pop("max_retries", 2))
|
||||
response = await openai_aclient.chat.completions.create(**data)
|
||||
streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="openai",logging_obj=logging_obj)
|
||||
async for transformed_chunk in streamwrapper:
|
||||
|
@ -309,7 +309,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
super().embedding()
|
||||
exception_mapping_worked = False
|
||||
try:
|
||||
openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session)
|
||||
openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, max_retries=data.pop("max_retries", 2))
|
||||
model = model
|
||||
data = {
|
||||
"model": model,
|
||||
|
|
|
@ -331,8 +331,8 @@ def completion(
|
|||
eos_token = kwargs.get("eos_token", None)
|
||||
acompletion = kwargs.get("acompletion", False)
|
||||
######## end of unpacking kwargs ###########
|
||||
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice"]
|
||||
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "max_retries"]
|
||||
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries"]
|
||||
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self"]
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
||||
|
||||
|
@ -342,9 +342,9 @@ def completion(
|
|||
timeout = 600 # set timeout for 10 minutes by default
|
||||
timeout = float(timeout)
|
||||
try:
|
||||
if base_url:
|
||||
if base_url is not None:
|
||||
api_base = base_url
|
||||
if max_retries:
|
||||
if max_retries is not None: # openai allows openai.OpenAI(max_retries=3)
|
||||
num_retries = max_retries
|
||||
logging = litellm_logging_obj
|
||||
fallbacks = (
|
||||
|
@ -410,6 +410,7 @@ def completion(
|
|||
seed=seed,
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
max_retries=max_retries,
|
||||
**non_default_params
|
||||
)
|
||||
|
||||
|
|
|
@ -1759,6 +1759,7 @@ def get_optional_params( # use the openai defaults
|
|||
seed=None,
|
||||
tools=None,
|
||||
tool_choice=None,
|
||||
max_retries=None,
|
||||
**kwargs
|
||||
):
|
||||
# retrieve all parameters passed to the function
|
||||
|
@ -1784,7 +1785,8 @@ def get_optional_params( # use the openai defaults
|
|||
"response_format": None,
|
||||
"seed": None,
|
||||
"tools": None,
|
||||
"tool_choice": None
|
||||
"tool_choice": None,
|
||||
"max_retries": None,
|
||||
}
|
||||
# filter out those parameters that were passed with non-default values
|
||||
non_default_params = {k: v for k, v in passed_params.items() if (k != "model" and k != "custom_llm_provider" and k in default_params and v != default_params[k])}
|
||||
|
@ -2178,7 +2180,7 @@ def get_optional_params( # use the openai defaults
|
|||
temperature = 0.0001 # close to 0
|
||||
optional_params["temperature"] = temperature
|
||||
else: # assume passing in params for openai/azure openai
|
||||
supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice"]
|
||||
supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice", "max_retries"]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
optional_params = non_default_params
|
||||
# if user passed in non-default kwargs for specific providers/models, pass them along
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue