fix(o_series_transformation.py): fix optional param check for o-serie… (#8787)

* fix(o_series_transformation.py): fix optional param check for o-series models

o3-mini and o-1 do not support parallel tool calling

* fix(utils.py): support 'drop_params' for 'thinking' param across models

allows switching to older claude versions (or non-anthropic models) and param to be safely dropped

* fix: fix passing thinking param in optional params

allows dropping thinking_param where not applicable

* test: update old model

* fix(utils.py): fix linting errors

* fix(main.py): add param to acompletion
This commit is contained in:
Krish Dholakia 2025-02-26 12:26:55 -08:00 committed by GitHub
parent aabb5c0df4
commit 017c482d7b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 87 additions and 31 deletions

View file

@ -120,6 +120,7 @@ OPENAI_CHAT_COMPLETION_PARAMS = [
"top_logprobs",
"reasoning_effort",
"extra_headers",
"thinking",
]
openai_compatible_endpoints: List = [

View file

@ -80,7 +80,7 @@ class AnthropicConfig(BaseConfig):
return super().get_config()
def get_supported_openai_params(self, model: str):
return [
params = [
"stream",
"stop",
"temperature",
@ -95,6 +95,11 @@ class AnthropicConfig(BaseConfig):
"user",
]
if "claude-3-7-sonnet" in model:
params.append("thinking")
return params
def get_json_schema_from_pydantic_object(
self, response_format: Union[Any, Dict, None]
) -> Optional[dict]:
@ -302,6 +307,7 @@ class AnthropicConfig(BaseConfig):
model: str,
drop_params: bool,
) -> dict:
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens"] = value
@ -358,7 +364,8 @@ class AnthropicConfig(BaseConfig):
optional_params["json_mode"] = True
if param == "user":
optional_params["metadata"] = {"user_id": value}
if param == "thinking":
optional_params["thinking"] = value
return optional_params
def _create_json_tool_call_for_response_format(

View file

@ -19,6 +19,7 @@ from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
from litellm.utils import (
supports_function_calling,
supports_parallel_function_calling,
supports_response_schema,
supports_system_messages,
)
@ -76,14 +77,19 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
model, custom_llm_provider
)
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
_supports_parallel_tool_calls = supports_parallel_function_calling(
model, custom_llm_provider
)
if not _supports_function_calling:
non_supported_params.append("tools")
non_supported_params.append("tool_choice")
non_supported_params.append("parallel_tool_calls")
non_supported_params.append("function_call")
non_supported_params.append("functions")
if not _supports_parallel_tool_calls:
non_supported_params.append("parallel_tool_calls")
if not _supports_response_schema:
non_supported_params.append("response_format")

View file

@ -166,6 +166,7 @@ from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels
from .llms.vllm.completion import handler as vllm_handler
from .llms.watsonx.chat.handler import WatsonXChatHandler
from .llms.watsonx.common_utils import IBMWatsonXMixin
from .types.llms.anthropic import AnthropicThinkingParam
from .types.llms.openai import (
ChatCompletionAssistantMessage,
ChatCompletionAudioParam,
@ -341,6 +342,7 @@ async def acompletion(
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
extra_headers: Optional[dict] = None,
# Optional liteLLM function params
thinking: Optional[AnthropicThinkingParam] = None,
**kwargs,
) -> Union[ModelResponse, CustomStreamWrapper]:
"""
@ -800,6 +802,7 @@ def completion( # type: ignore # noqa: PLR0915
api_key: Optional[str] = None,
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
# Optional liteLLM function params
thinking: Optional[AnthropicThinkingParam] = None,
**kwargs,
) -> Union[ModelResponse, CustomStreamWrapper]:
"""
@ -1106,6 +1109,7 @@ def completion( # type: ignore # noqa: PLR0915
parallel_tool_calls=parallel_tool_calls,
messages=messages,
reasoning_effort=reasoning_effort,
thinking=thinking,
**non_default_params,
)

View file

@ -359,3 +359,8 @@ ANTHROPIC_API_HEADERS = {
ANTHROPIC_API_ONLY_HEADERS = { # fails if calling anthropic on vertex ai / bedrock
"anthropic-beta",
}
class AnthropicThinkingParam(TypedDict, total=False):
type: Literal["enabled"]
budget_tokens: int

View file

@ -383,6 +383,7 @@ class ChatCompletionImageObject(TypedDict):
type: Literal["image_url"]
image_url: Union[str, ChatCompletionImageUrlObject]
class ChatCompletionVideoUrlObject(TypedDict, total=False):
url: Required[str]
detail: str

View file

@ -119,7 +119,10 @@ from litellm.router_utils.get_retry_from_policy import (
reset_retry_policy,
)
from litellm.secret_managers.main import get_secret
from litellm.types.llms.anthropic import ANTHROPIC_API_ONLY_HEADERS
from litellm.types.llms.anthropic import (
ANTHROPIC_API_ONLY_HEADERS,
AnthropicThinkingParam,
)
from litellm.types.llms.openai import (
AllMessageValues,
AllPromptValues,
@ -1969,6 +1972,19 @@ def supports_response_schema(
)
def supports_parallel_function_calling(
model: str, custom_llm_provider: Optional[str] = None
) -> bool:
"""
Check if the given model supports parallel tool calls and return a boolean value.
"""
return _supports_factory(
model=model,
custom_llm_provider=custom_llm_provider,
key="supports_parallel_function_calling",
)
def supports_function_calling(
model: str, custom_llm_provider: Optional[str] = None
) -> bool:
@ -2118,30 +2134,6 @@ def supports_embedding_image_input(
)
def supports_parallel_function_calling(model: str):
"""
Check if the given model supports parallel function calling and return True if it does, False otherwise.
Parameters:
model (str): The model to check for support of parallel function calling.
Returns:
bool: True if the model supports parallel function calling, False otherwise.
Raises:
Exception: If the model is not found in the model_cost dictionary.
"""
if model in litellm.model_cost:
model_info = litellm.model_cost[model]
if model_info.get("supports_parallel_function_calling", False) is True:
return True
return False
else:
raise Exception(
f"Model not supports parallel function calling. You passed model={model}."
)
####### HELPER FUNCTIONS ################
def _update_dictionary(existing_dict: Dict, new_dict: dict) -> dict:
for k, v in new_dict.items():
@ -2752,6 +2744,7 @@ def get_optional_params( # noqa: PLR0915
reasoning_effort=None,
additional_drop_params=None,
messages: Optional[List[AllMessageValues]] = None,
thinking: Optional[AnthropicThinkingParam] = None,
**kwargs,
):
# retrieve all parameters passed to the function
@ -2836,9 +2829,11 @@ def get_optional_params( # noqa: PLR0915
"additional_drop_params": None,
"messages": None,
"reasoning_effort": None,
"thinking": None,
}
# filter out those parameters that were passed with non-default values
non_default_params = {
k: v
for k, v in passed_params.items()

View file

@ -1166,6 +1166,8 @@ def test_anthropic_citations_api_streaming():
def test_anthropic_thinking_output():
from litellm import completion
litellm._turn_on_debug()
resp = completion(
model="anthropic/claude-3-7-sonnet-20250219",
messages=[{"role": "user", "content": "What is the capital of France?"}],

View file

@ -338,3 +338,18 @@ def test_openai_max_retries_0(mock_get_openai_client):
mock_get_openai_client.assert_called_once()
assert mock_get_openai_client.call_args.kwargs["max_retries"] == 0
@pytest.mark.parametrize("model", ["o1", "o1-preview", "o1-mini", "o3-mini"])
def test_o1_parallel_tool_calls(model):
litellm.completion(
model=model,
messages=[
{
"role": "user",
"content": "foo",
}
],
parallel_tool_calls=True,
drop_params=True,
)

View file

@ -1069,7 +1069,6 @@ def test_gemini_frequency_penalty():
assert optional_params["frequency_penalty"] == 0.5
def test_azure_prediction_param():
optional_params = get_optional_params(
model="chatgpt-v2",
@ -1084,6 +1083,7 @@ def test_azure_prediction_param():
"content": "LiteLLM is a very useful way to connect to a variety of LLMs.",
}
def test_vertex_ai_ft_llama():
optional_params = get_optional_params(
model="1984786713414729728",
@ -1093,3 +1093,24 @@ def test_vertex_ai_ft_llama():
)
assert optional_params["frequency_penalty"] == 0.5
assert "max_retries" not in optional_params
@pytest.mark.parametrize(
"model, expected_thinking",
[
("claude-3-5-sonnet", False),
("claude-3-7-sonnet", True),
("gpt-3.5-turbo", False),
],
)
def test_anthropic_thinking_param(model, expected_thinking):
optional_params = get_optional_params(
model=model,
custom_llm_provider="anthropic",
thinking={"type": "enabled", "budget_tokens": 1024},
drop_params=True,
)
if expected_thinking:
assert "thinking" in optional_params
else:
assert "thinking" not in optional_params

View file

@ -4072,7 +4072,7 @@ def test_mock_response_iterator_tool_use():
"anthropic/claude-3-7-sonnet-20250219",
],
)
def test_deepseek_reasoning_content_completion(model):
def test_reasoning_content_completion(model):
# litellm.set_verbose = True
try:
# litellm._turn_on_debug()
@ -4081,7 +4081,6 @@ def test_deepseek_reasoning_content_completion(model):
messages=[{"role": "user", "content": "Tell me a joke."}],
stream=True,
thinking={"type": "enabled", "budget_tokens": 1024},
timeout=5,
)
reasoning_content_exists = False