mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 19:54:13 +00:00
fix(health.md): add rerank model health check information (#7295)
* fix(health.md): add rerank model health check information * build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits * build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true * docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature * fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini * build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models needed as o1-preview, and o1-mini models don't support 'system message * fix(o1_transformation.py): translate system message based on if o1 model supports it * fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview o1 currently doesn't support streaming, but the other model versions do Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so Fixes https://github.com/BerriAI/litellm/issues/7292 * fix: fix linting errors * fix: update '_transform_messages' * fix(o1_transformation.py): fix provider passed for supported param checks * test(base_llm_unit_tests.py): skip test if api takes >5s to respond * fix(utils.py): return false in 'supports_factory' if can't find value * fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1 * feat(openai.py): support stream faking natively in openai handler Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview Fixes https://github.com/BerriAI/litellm/issues/7292 * fix(openai.py): use inference param instead of original optional param
This commit is contained in:
parent
e95820367f
commit
1a4910f6c0
34 changed files with 800 additions and 515 deletions
|
@ -15,7 +15,14 @@ import types
|
|||
from typing import Any, List, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
|
||||
from litellm.utils import (
|
||||
supports_function_calling,
|
||||
supports_response_schema,
|
||||
supports_system_messages,
|
||||
)
|
||||
|
||||
from .gpt_transformation import OpenAIGPTConfig
|
||||
|
||||
|
@ -29,6 +36,15 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
|||
def get_config(cls):
|
||||
return super().get_config()
|
||||
|
||||
def should_fake_stream(
|
||||
self, model: str, custom_llm_provider: Optional[str] = None
|
||||
) -> bool:
|
||||
supported_stream_models = ["o1-mini", "o1-preview"]
|
||||
for supported_model in supported_stream_models:
|
||||
if supported_model in model:
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_supported_openai_params(self, model: str) -> list:
|
||||
"""
|
||||
Get the supported OpenAI params for the given model
|
||||
|
@ -38,21 +54,37 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
|||
all_openai_params = super().get_supported_openai_params(model=model)
|
||||
non_supported_params = [
|
||||
"logprobs",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"parallel_tool_calls",
|
||||
"function_call",
|
||||
"functions",
|
||||
"top_p",
|
||||
"n",
|
||||
"presence_penalty",
|
||||
"frequency_penalty",
|
||||
"top_logprobs",
|
||||
"response_format",
|
||||
"stop",
|
||||
"stream_options",
|
||||
]
|
||||
|
||||
try:
|
||||
model, custom_llm_provider, api_base, api_key = get_llm_provider(
|
||||
model=model
|
||||
)
|
||||
except Exception:
|
||||
verbose_logger.debug(
|
||||
f"Unable to infer model provider for model={model}, defaulting to openai for o1 supported param check"
|
||||
)
|
||||
custom_llm_provider = "openai"
|
||||
|
||||
_supports_function_calling = supports_function_calling(
|
||||
model, custom_llm_provider
|
||||
)
|
||||
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
|
||||
|
||||
if not _supports_function_calling:
|
||||
non_supported_params.append("tools")
|
||||
non_supported_params.append("tool_choice")
|
||||
non_supported_params.append("parallel_tool_calls")
|
||||
non_supported_params.append("function_call")
|
||||
non_supported_params.append("functions")
|
||||
|
||||
if not _supports_response_schema:
|
||||
non_supported_params.append("response_format")
|
||||
|
||||
return [
|
||||
param for param in all_openai_params if param not in non_supported_params
|
||||
]
|
||||
|
@ -95,16 +127,16 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
|||
return False
|
||||
|
||||
def _transform_messages(
|
||||
self, messages: List[AllMessageValues]
|
||||
self, messages: List[AllMessageValues], model: str
|
||||
) -> List[AllMessageValues]:
|
||||
"""
|
||||
Handles limitations of O-1 model family.
|
||||
- modalities: image => drop param (if user opts in to dropping param)
|
||||
- role: system ==> translate to role 'user'
|
||||
"""
|
||||
|
||||
_supports_system_messages = supports_system_messages(model, "openai")
|
||||
for i, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
if message["role"] == "system" and not _supports_system_messages:
|
||||
new_message = ChatCompletionUserMessage(
|
||||
content=message["content"], role="user"
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue