mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
LiteLLM Minor Fixes and Improvements (09/13/2024) (#5689)
* refactor: cleanup unused variables + fix pyright errors * feat(health_check.py): Closes https://github.com/BerriAI/litellm/issues/5686 * fix(o1_reasoning.py): add stricter check for o-1 reasoning model * refactor(mistral/): make it easier to see mistral transformation logic * fix(openai.py): fix openai o-1 model param mapping Fixes https://github.com/BerriAI/litellm/issues/5685 * feat(main.py): infer finetuned gemini model from base model Fixes https://github.com/BerriAI/litellm/issues/5678 * docs(vertex.md): update docs to call finetuned gemini models * feat(proxy_server.py): allow admin to hide proxy model aliases Closes https://github.com/BerriAI/litellm/issues/5692 * docs(load_balancing.md): add docs on hiding alias models from proxy config * fix(base.py): don't raise notimplemented error * fix(user_api_key_auth.py): fix model max budget check * fix(router.py): fix elif * fix(user_api_key_auth.py): don't set team_id to empty str * fix(team_endpoints.py): fix response type * test(test_completion.py): handle predibase error * test(test_proxy_server.py): fix test * fix(o1_transformation.py): fix max_completion_token mapping * test(test_image_generation.py): mark flaky test
This commit is contained in:
parent
60c5d3ebec
commit
713d762411
35 changed files with 1020 additions and 539 deletions
|
@ -60,122 +60,6 @@ class OpenAIError(Exception):
|
|||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
||||
class MistralConfig:
|
||||
"""
|
||||
Reference: https://docs.mistral.ai/api/
|
||||
|
||||
The class `MistralConfig` provides configuration for the Mistral's Chat API interface. Below are the parameters:
|
||||
|
||||
- `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2. API Default - 0.7.
|
||||
|
||||
- `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling. API Default - 1.
|
||||
|
||||
- `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion. API Default - null.
|
||||
|
||||
- `tools` (list or null): A list of available tools for the model. Use this to specify functions for which the model can generate JSON inputs.
|
||||
|
||||
- `tool_choice` (string - 'auto'/'any'/'none' or null): Specifies if/how functions are called. If set to none the model won't call a function and will generate a message instead. If set to auto the model can choose to either generate a message or call a function. If set to any the model is forced to call a function. Default - 'auto'.
|
||||
|
||||
- `stop` (string or array of strings): Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
||||
|
||||
- `random_seed` (integer or null): The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
|
||||
- `safe_prompt` (boolean): Whether to inject a safety prompt before all conversations. API Default - 'false'.
|
||||
|
||||
- `response_format` (object or null): An object specifying the format that the model must output. Setting to { "type": "json_object" } enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message.
|
||||
"""
|
||||
|
||||
temperature: Optional[int] = None
|
||||
top_p: Optional[int] = None
|
||||
max_tokens: Optional[int] = None
|
||||
tools: Optional[list] = None
|
||||
tool_choice: Optional[Literal["auto", "any", "none"]] = None
|
||||
random_seed: Optional[int] = None
|
||||
safe_prompt: Optional[bool] = None
|
||||
response_format: Optional[dict] = None
|
||||
stop: Optional[Union[str, list]] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
temperature: Optional[int] = None,
|
||||
top_p: Optional[int] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
tools: Optional[list] = None,
|
||||
tool_choice: Optional[Literal["auto", "any", "none"]] = None,
|
||||
random_seed: Optional[int] = None,
|
||||
safe_prompt: Optional[bool] = None,
|
||||
response_format: Optional[dict] = None,
|
||||
stop: Optional[Union[str, list]] = None,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"stream",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"seed",
|
||||
"stop",
|
||||
"response_format",
|
||||
]
|
||||
|
||||
def _map_tool_choice(self, tool_choice: str) -> str:
|
||||
if tool_choice == "auto" or tool_choice == "none":
|
||||
return tool_choice
|
||||
elif tool_choice == "required":
|
||||
return "any"
|
||||
else: # openai 'tool_choice' object param not supported by Mistral API
|
||||
return "any"
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
if param == "tools":
|
||||
optional_params["tools"] = value
|
||||
if param == "stream" and value is True:
|
||||
optional_params["stream"] = value
|
||||
if param == "temperature":
|
||||
optional_params["temperature"] = value
|
||||
if param == "top_p":
|
||||
optional_params["top_p"] = value
|
||||
if param == "stop":
|
||||
optional_params["stop"] = value
|
||||
if param == "tool_choice" and isinstance(value, str):
|
||||
optional_params["tool_choice"] = self._map_tool_choice(
|
||||
tool_choice=value
|
||||
)
|
||||
if param == "seed":
|
||||
optional_params["extra_body"] = {"random_seed": value}
|
||||
if param == "response_format":
|
||||
optional_params["response_format"] = value
|
||||
return optional_params
|
||||
|
||||
|
||||
class MistralEmbeddingConfig:
|
||||
"""
|
||||
Reference: https://docs.mistral.ai/api/#operation/createEmbedding
|
||||
|
@ -526,44 +410,19 @@ class OpenAIConfig:
|
|||
}
|
||||
|
||||
def get_supported_openai_params(self, model: str) -> list:
|
||||
base_params = [
|
||||
"frequency_penalty",
|
||||
"logit_bias",
|
||||
"logprobs",
|
||||
"top_logprobs",
|
||||
"max_tokens",
|
||||
"n",
|
||||
"presence_penalty",
|
||||
"seed",
|
||||
"stop",
|
||||
"stream",
|
||||
"stream_options",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"function_call",
|
||||
"functions",
|
||||
"max_retries",
|
||||
"extra_headers",
|
||||
"parallel_tool_calls",
|
||||
] # works across all models
|
||||
|
||||
model_specific_params = []
|
||||
if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model):
|
||||
return litellm.OpenAIO1Config().get_supported_openai_params(model=model)
|
||||
if (
|
||||
model != "gpt-3.5-turbo-16k" and model != "gpt-4"
|
||||
): # gpt-4 does not support 'response_format'
|
||||
model_specific_params.append("response_format")
|
||||
else:
|
||||
return litellm.OpenAIGPTConfig().get_supported_openai_params(model=model)
|
||||
|
||||
if (
|
||||
model in litellm.open_ai_chat_completion_models
|
||||
) or model in litellm.open_ai_text_completion_models:
|
||||
model_specific_params.append(
|
||||
"user"
|
||||
) # user is not a param supported by all openai-compatible endpoints - e.g. azure ai
|
||||
return base_params + model_specific_params
|
||||
def _map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, model: str
|
||||
) -> dict:
|
||||
supported_openai_params = self.get_supported_openai_params(model)
|
||||
for param, value in non_default_params.items():
|
||||
if param in supported_openai_params:
|
||||
optional_params[param] = value
|
||||
return optional_params
|
||||
|
||||
def map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, model: str
|
||||
|
@ -575,11 +434,11 @@ class OpenAIConfig:
|
|||
optional_params=optional_params,
|
||||
model=model,
|
||||
)
|
||||
supported_openai_params = self.get_supported_openai_params(model)
|
||||
for param, value in non_default_params.items():
|
||||
if param in supported_openai_params:
|
||||
optional_params[param] = value
|
||||
return optional_params
|
||||
return litellm.OpenAIGPTConfig().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
)
|
||||
|
||||
|
||||
class OpenAITextCompletionConfig:
|
||||
|
@ -816,18 +675,18 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def completion(
|
||||
def completion( # type: ignore
|
||||
self,
|
||||
model_response: ModelResponse,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
optional_params: dict,
|
||||
logging_obj: Any,
|
||||
model: Optional[str] = None,
|
||||
messages: Optional[list] = None,
|
||||
print_verbose: Optional[Callable] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
acompletion: bool = False,
|
||||
logging_obj=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
headers: Optional[dict] = None,
|
||||
|
@ -858,14 +717,14 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
# process all OpenAI compatible provider logic here
|
||||
if custom_llm_provider == "mistral":
|
||||
# check if message content passed in as list, and not string
|
||||
messages = prompt_factory(
|
||||
messages = prompt_factory( # type: ignore
|
||||
model=model,
|
||||
messages=messages,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
if custom_llm_provider == "perplexity" and messages is not None:
|
||||
# check if messages.name is passed + supported, if not supported remove
|
||||
messages = prompt_factory(
|
||||
messages = prompt_factory( # type: ignore
|
||||
model=model,
|
||||
messages=messages,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
|
@ -933,7 +792,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
status_code=422, message="max retries must be an int"
|
||||
)
|
||||
|
||||
openai_client = self._get_openai_client(
|
||||
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=False,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1068,7 +927,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
2
|
||||
): # if call fails due to alternating messages, retry with reformatted message
|
||||
try:
|
||||
openai_aclient = self._get_openai_client(
|
||||
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=True,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1156,7 +1015,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
max_retries=None,
|
||||
headers=None,
|
||||
):
|
||||
openai_client = self._get_openai_client(
|
||||
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=False,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1210,7 +1069,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
response = None
|
||||
for _ in range(2):
|
||||
try:
|
||||
openai_aclient = self._get_openai_client(
|
||||
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=True,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1282,7 +1141,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
error_headers = getattr(e, "headers", None)
|
||||
raise OpenAIError(
|
||||
status_code=500,
|
||||
message=f"{str(e)}\n\nOriginal Response: {response.text}",
|
||||
message=f"{str(e)}\n\nOriginal Response: {response.text}", # type: ignore
|
||||
headers=error_headers,
|
||||
)
|
||||
else:
|
||||
|
@ -1294,7 +1153,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
)
|
||||
elif hasattr(e, "status_code"):
|
||||
raise OpenAIError(
|
||||
status_code=e.status_code,
|
||||
status_code=getattr(e, "status_code", 500),
|
||||
message=str(e),
|
||||
headers=error_headers,
|
||||
)
|
||||
|
@ -1361,7 +1220,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
):
|
||||
response = None
|
||||
try:
|
||||
openai_aclient = self._get_openai_client(
|
||||
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=True,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1410,16 +1269,16 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
status_code=status_code, message=str(e), headers=error_headers
|
||||
)
|
||||
|
||||
def embedding(
|
||||
def embedding( # type: ignore
|
||||
self,
|
||||
model: str,
|
||||
input: list,
|
||||
timeout: float,
|
||||
logging_obj,
|
||||
model_response: litellm.utils.EmbeddingResponse,
|
||||
optional_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
optional_params=None,
|
||||
client=None,
|
||||
aembedding=None,
|
||||
):
|
||||
|
@ -1452,7 +1311,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
)
|
||||
return response
|
||||
|
||||
openai_client = self._get_openai_client(
|
||||
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=False,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1496,11 +1355,11 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
data: dict,
|
||||
model_response: ModelResponse,
|
||||
timeout: float,
|
||||
logging_obj: Any,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client=None,
|
||||
max_retries=None,
|
||||
logging_obj=None,
|
||||
):
|
||||
response = None
|
||||
try:
|
||||
|
@ -1538,15 +1397,16 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
model: Optional[str],
|
||||
prompt: str,
|
||||
timeout: float,
|
||||
optional_params: dict,
|
||||
logging_obj: Any,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
model_response: Optional[litellm.utils.ImageResponse] = None,
|
||||
logging_obj=None,
|
||||
optional_params=None,
|
||||
client=None,
|
||||
aimg_generation=None,
|
||||
):
|
||||
exception_mapping_worked = False
|
||||
data = {}
|
||||
try:
|
||||
model = model
|
||||
data = {"model": model, "prompt": prompt, **optional_params}
|
||||
|
@ -1611,7 +1471,9 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
original_response=str(e),
|
||||
)
|
||||
if hasattr(e, "status_code"):
|
||||
raise OpenAIError(status_code=e.status_code, message=str(e))
|
||||
raise OpenAIError(
|
||||
status_code=getattr(e, "status_code", 500), message=str(e)
|
||||
)
|
||||
else:
|
||||
raise OpenAIError(status_code=500, message=str(e))
|
||||
|
||||
|
@ -1661,7 +1523,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
input=input,
|
||||
**optional_params,
|
||||
)
|
||||
return response
|
||||
return response # type: ignore
|
||||
|
||||
async def async_audio_speech(
|
||||
self,
|
||||
|
@ -1784,11 +1646,8 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
|
||||
|
||||
class OpenAITextCompletion(BaseLLM):
|
||||
_client_session: httpx.Client
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._client_session = self.create_client_session()
|
||||
|
||||
def validate_environment(self, api_key):
|
||||
headers = {
|
||||
|
@ -1806,10 +1665,10 @@ class OpenAITextCompletion(BaseLLM):
|
|||
messages: list,
|
||||
timeout: float,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
optional_params: dict,
|
||||
print_verbose: Optional[Callable] = None,
|
||||
api_base: Optional[str] = None,
|
||||
acompletion: bool = False,
|
||||
optional_params=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
client=None,
|
||||
|
@ -1921,7 +1780,7 @@ class OpenAITextCompletion(BaseLLM):
|
|||
api_key: str,
|
||||
model: str,
|
||||
timeout: float,
|
||||
max_retries=None,
|
||||
max_retries: int,
|
||||
organization: Optional[str] = None,
|
||||
client=None,
|
||||
):
|
||||
|
@ -2017,9 +1876,9 @@ class OpenAITextCompletion(BaseLLM):
|
|||
model_response: ModelResponse,
|
||||
model: str,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
api_base: Optional[str] = None,
|
||||
client=None,
|
||||
max_retries=None,
|
||||
organization=None,
|
||||
):
|
||||
if client is None:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue