mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
fix(o_series_transformation.py): fix optional param check for o-serie… (#8787)
* fix(o_series_transformation.py): fix optional param check for o-series models o3-mini and o-1 do not support parallel tool calling * fix(utils.py): support 'drop_params' for 'thinking' param across models allows switching to older claude versions (or non-anthropic models) and param to be safely dropped * fix: fix passing thinking param in optional params allows dropping thinking_param where not applicable * test: update old model * fix(utils.py): fix linting errors * fix(main.py): add param to acompletion
This commit is contained in:
parent
aabb5c0df4
commit
017c482d7b
11 changed files with 87 additions and 31 deletions
|
@ -120,6 +120,7 @@ OPENAI_CHAT_COMPLETION_PARAMS = [
|
|||
"top_logprobs",
|
||||
"reasoning_effort",
|
||||
"extra_headers",
|
||||
"thinking",
|
||||
]
|
||||
|
||||
openai_compatible_endpoints: List = [
|
||||
|
|
|
@ -80,7 +80,7 @@ class AnthropicConfig(BaseConfig):
|
|||
return super().get_config()
|
||||
|
||||
def get_supported_openai_params(self, model: str):
|
||||
return [
|
||||
params = [
|
||||
"stream",
|
||||
"stop",
|
||||
"temperature",
|
||||
|
@ -95,6 +95,11 @@ class AnthropicConfig(BaseConfig):
|
|||
"user",
|
||||
]
|
||||
|
||||
if "claude-3-7-sonnet" in model:
|
||||
params.append("thinking")
|
||||
|
||||
return params
|
||||
|
||||
def get_json_schema_from_pydantic_object(
|
||||
self, response_format: Union[Any, Dict, None]
|
||||
) -> Optional[dict]:
|
||||
|
@ -302,6 +307,7 @@ class AnthropicConfig(BaseConfig):
|
|||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
|
@ -358,7 +364,8 @@ class AnthropicConfig(BaseConfig):
|
|||
optional_params["json_mode"] = True
|
||||
if param == "user":
|
||||
optional_params["metadata"] = {"user_id": value}
|
||||
|
||||
if param == "thinking":
|
||||
optional_params["thinking"] = value
|
||||
return optional_params
|
||||
|
||||
def _create_json_tool_call_for_response_format(
|
||||
|
|
|
@ -19,6 +19,7 @@ from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
|
|||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
|
||||
from litellm.utils import (
|
||||
supports_function_calling,
|
||||
supports_parallel_function_calling,
|
||||
supports_response_schema,
|
||||
supports_system_messages,
|
||||
)
|
||||
|
@ -76,14 +77,19 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
|
|||
model, custom_llm_provider
|
||||
)
|
||||
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
|
||||
_supports_parallel_tool_calls = supports_parallel_function_calling(
|
||||
model, custom_llm_provider
|
||||
)
|
||||
|
||||
if not _supports_function_calling:
|
||||
non_supported_params.append("tools")
|
||||
non_supported_params.append("tool_choice")
|
||||
non_supported_params.append("parallel_tool_calls")
|
||||
non_supported_params.append("function_call")
|
||||
non_supported_params.append("functions")
|
||||
|
||||
if not _supports_parallel_tool_calls:
|
||||
non_supported_params.append("parallel_tool_calls")
|
||||
|
||||
if not _supports_response_schema:
|
||||
non_supported_params.append("response_format")
|
||||
|
||||
|
|
|
@ -166,6 +166,7 @@ from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels
|
|||
from .llms.vllm.completion import handler as vllm_handler
|
||||
from .llms.watsonx.chat.handler import WatsonXChatHandler
|
||||
from .llms.watsonx.common_utils import IBMWatsonXMixin
|
||||
from .types.llms.anthropic import AnthropicThinkingParam
|
||||
from .types.llms.openai import (
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionAudioParam,
|
||||
|
@ -341,6 +342,7 @@ async def acompletion(
|
|||
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
||||
extra_headers: Optional[dict] = None,
|
||||
# Optional liteLLM function params
|
||||
thinking: Optional[AnthropicThinkingParam] = None,
|
||||
**kwargs,
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
"""
|
||||
|
@ -800,6 +802,7 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
api_key: Optional[str] = None,
|
||||
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
||||
# Optional liteLLM function params
|
||||
thinking: Optional[AnthropicThinkingParam] = None,
|
||||
**kwargs,
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
"""
|
||||
|
@ -1106,6 +1109,7 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
parallel_tool_calls=parallel_tool_calls,
|
||||
messages=messages,
|
||||
reasoning_effort=reasoning_effort,
|
||||
thinking=thinking,
|
||||
**non_default_params,
|
||||
)
|
||||
|
||||
|
|
|
@ -359,3 +359,8 @@ ANTHROPIC_API_HEADERS = {
|
|||
ANTHROPIC_API_ONLY_HEADERS = { # fails if calling anthropic on vertex ai / bedrock
|
||||
"anthropic-beta",
|
||||
}
|
||||
|
||||
|
||||
class AnthropicThinkingParam(TypedDict, total=False):
|
||||
type: Literal["enabled"]
|
||||
budget_tokens: int
|
||||
|
|
|
@ -383,6 +383,7 @@ class ChatCompletionImageObject(TypedDict):
|
|||
type: Literal["image_url"]
|
||||
image_url: Union[str, ChatCompletionImageUrlObject]
|
||||
|
||||
|
||||
class ChatCompletionVideoUrlObject(TypedDict, total=False):
|
||||
url: Required[str]
|
||||
detail: str
|
||||
|
|
|
@ -119,7 +119,10 @@ from litellm.router_utils.get_retry_from_policy import (
|
|||
reset_retry_policy,
|
||||
)
|
||||
from litellm.secret_managers.main import get_secret
|
||||
from litellm.types.llms.anthropic import ANTHROPIC_API_ONLY_HEADERS
|
||||
from litellm.types.llms.anthropic import (
|
||||
ANTHROPIC_API_ONLY_HEADERS,
|
||||
AnthropicThinkingParam,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
AllPromptValues,
|
||||
|
@ -1969,6 +1972,19 @@ def supports_response_schema(
|
|||
)
|
||||
|
||||
|
||||
def supports_parallel_function_calling(
|
||||
model: str, custom_llm_provider: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the given model supports parallel tool calls and return a boolean value.
|
||||
"""
|
||||
return _supports_factory(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
key="supports_parallel_function_calling",
|
||||
)
|
||||
|
||||
|
||||
def supports_function_calling(
|
||||
model: str, custom_llm_provider: Optional[str] = None
|
||||
) -> bool:
|
||||
|
@ -2118,30 +2134,6 @@ def supports_embedding_image_input(
|
|||
)
|
||||
|
||||
|
||||
def supports_parallel_function_calling(model: str):
|
||||
"""
|
||||
Check if the given model supports parallel function calling and return True if it does, False otherwise.
|
||||
|
||||
Parameters:
|
||||
model (str): The model to check for support of parallel function calling.
|
||||
|
||||
Returns:
|
||||
bool: True if the model supports parallel function calling, False otherwise.
|
||||
|
||||
Raises:
|
||||
Exception: If the model is not found in the model_cost dictionary.
|
||||
"""
|
||||
if model in litellm.model_cost:
|
||||
model_info = litellm.model_cost[model]
|
||||
if model_info.get("supports_parallel_function_calling", False) is True:
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
raise Exception(
|
||||
f"Model not supports parallel function calling. You passed model={model}."
|
||||
)
|
||||
|
||||
|
||||
####### HELPER FUNCTIONS ################
|
||||
def _update_dictionary(existing_dict: Dict, new_dict: dict) -> dict:
|
||||
for k, v in new_dict.items():
|
||||
|
@ -2752,6 +2744,7 @@ def get_optional_params( # noqa: PLR0915
|
|||
reasoning_effort=None,
|
||||
additional_drop_params=None,
|
||||
messages: Optional[List[AllMessageValues]] = None,
|
||||
thinking: Optional[AnthropicThinkingParam] = None,
|
||||
**kwargs,
|
||||
):
|
||||
# retrieve all parameters passed to the function
|
||||
|
@ -2836,9 +2829,11 @@ def get_optional_params( # noqa: PLR0915
|
|||
"additional_drop_params": None,
|
||||
"messages": None,
|
||||
"reasoning_effort": None,
|
||||
"thinking": None,
|
||||
}
|
||||
|
||||
# filter out those parameters that were passed with non-default values
|
||||
|
||||
non_default_params = {
|
||||
k: v
|
||||
for k, v in passed_params.items()
|
||||
|
|
|
@ -1166,6 +1166,8 @@ def test_anthropic_citations_api_streaming():
|
|||
def test_anthropic_thinking_output():
|
||||
from litellm import completion
|
||||
|
||||
litellm._turn_on_debug()
|
||||
|
||||
resp = completion(
|
||||
model="anthropic/claude-3-7-sonnet-20250219",
|
||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||
|
|
|
@ -338,3 +338,18 @@ def test_openai_max_retries_0(mock_get_openai_client):
|
|||
|
||||
mock_get_openai_client.assert_called_once()
|
||||
assert mock_get_openai_client.call_args.kwargs["max_retries"] == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["o1", "o1-preview", "o1-mini", "o3-mini"])
|
||||
def test_o1_parallel_tool_calls(model):
|
||||
litellm.completion(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "foo",
|
||||
}
|
||||
],
|
||||
parallel_tool_calls=True,
|
||||
drop_params=True,
|
||||
)
|
||||
|
|
|
@ -1069,7 +1069,6 @@ def test_gemini_frequency_penalty():
|
|||
assert optional_params["frequency_penalty"] == 0.5
|
||||
|
||||
|
||||
|
||||
def test_azure_prediction_param():
|
||||
optional_params = get_optional_params(
|
||||
model="chatgpt-v2",
|
||||
|
@ -1084,6 +1083,7 @@ def test_azure_prediction_param():
|
|||
"content": "LiteLLM is a very useful way to connect to a variety of LLMs.",
|
||||
}
|
||||
|
||||
|
||||
def test_vertex_ai_ft_llama():
|
||||
optional_params = get_optional_params(
|
||||
model="1984786713414729728",
|
||||
|
@ -1093,3 +1093,24 @@ def test_vertex_ai_ft_llama():
|
|||
)
|
||||
assert optional_params["frequency_penalty"] == 0.5
|
||||
assert "max_retries" not in optional_params
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expected_thinking",
|
||||
[
|
||||
("claude-3-5-sonnet", False),
|
||||
("claude-3-7-sonnet", True),
|
||||
("gpt-3.5-turbo", False),
|
||||
],
|
||||
)
|
||||
def test_anthropic_thinking_param(model, expected_thinking):
|
||||
optional_params = get_optional_params(
|
||||
model=model,
|
||||
custom_llm_provider="anthropic",
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
drop_params=True,
|
||||
)
|
||||
if expected_thinking:
|
||||
assert "thinking" in optional_params
|
||||
else:
|
||||
assert "thinking" not in optional_params
|
||||
|
|
|
@ -4072,7 +4072,7 @@ def test_mock_response_iterator_tool_use():
|
|||
"anthropic/claude-3-7-sonnet-20250219",
|
||||
],
|
||||
)
|
||||
def test_deepseek_reasoning_content_completion(model):
|
||||
def test_reasoning_content_completion(model):
|
||||
# litellm.set_verbose = True
|
||||
try:
|
||||
# litellm._turn_on_debug()
|
||||
|
@ -4081,7 +4081,6 @@ def test_deepseek_reasoning_content_completion(model):
|
|||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||
stream=True,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
reasoning_content_exists = False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue