refactor(azure.py): moving azure openai calls to http calls

2025-04-25 10:44:24 +00:00 · 2023-11-08 16:52:18 -08:00 · 2023-11-08 16:52:18 -08:00 · 53abc31c27
commit 53abc31c27
parent 01a7660a12
7 changed files with 309 additions and 78 deletions
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -145,56 +145,6 @@ class OpenAITextCompletionConfig():
                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod)) 
                and v is not None}

-
-class AzureOpenAIConfig(OpenAIConfig):
-    """
-    Reference: https://platform.openai.com/docs/api-reference/chat/create
-
-    The class `AzureOpenAIConfig` provides configuration for the OpenAI's Chat API interface, for use with Azure. It inherits from `OpenAIConfig`. Below are the parameters::
-
-    - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
-
-    - `function_call` (string or object): This optional parameter controls how the model calls functions.
-
-    - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
-
-    - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
-
-    - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
-
-    - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
-
-    - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
-
-    - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
-
-    - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
-
-    - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling. 
-    """
-
-    def __init__(self, 
-                 frequency_penalty: Optional[int] = None, 
-                 function_call: Optional[Union[str, dict]]= None, 
-                 functions: Optional[list]= None, 
-                 logit_bias: Optional[dict]= None, 
-                 max_tokens: Optional[int]= None, 
-                 n: Optional[int]= None, 
-                 presence_penalty: Optional[int]= None, 
-                 stop: Optional[Union[str,list]]=None, 
-                 temperature: Optional[int]= None, 
-                 top_p: Optional[int]= None) -> None:
-        super().__init__(frequency_penalty, 
-                         function_call, 
-                         functions, 
-                         logit_bias, 
-                         max_tokens, 
-                         n, 
-                         presence_penalty, 
-                         stop, 
-                         temperature, 
-                         top_p)
-
 class OpenAIChatCompletion(BaseLLM):
    _client_session: requests.Session