fix(o_series_transformation.py): fix optional param check for o-serie… (#8787)

* fix(o_series_transformation.py): fix optional param check for o-series models o3-mini and o-1 do not support parallel tool calling * fix(utils.py): support 'drop_params' for 'thinking' param across models allows switching to older claude versions (or non-anthropic models) and param to be safely dropped * fix: fix passing thinking param in optional params allows dropping thinking_param where not applicable * test: update old model * fix(utils.py): fix linting errors * fix(main.py): add param to acompletion
2025-04-25 10:44:24 +00:00 · 2025-02-26 12:26:55 -08:00 · 2025-02-26 12:26:55 -08:00 · 017c482d7b
commit 017c482d7b
parent aabb5c0df4
11 changed files with 87 additions and 31 deletions
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -120,6 +120,7 @@ OPENAI_CHAT_COMPLETION_PARAMS = [
    "top_logprobs",
    "reasoning_effort",
    "extra_headers",
+    "thinking",
 ]

 openai_compatible_endpoints: List = [
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -80,7 +80,7 @@ class AnthropicConfig(BaseConfig):
        return super().get_config()

    def get_supported_openai_params(self, model: str):
-        return [
+        params = [
            "stream",
            "stop",
            "temperature",
@ -95,6 +95,11 @@ class AnthropicConfig(BaseConfig):
            "user",
        ]

+        if "claude-3-7-sonnet" in model:
+            params.append("thinking")
+
+        return params
+
    def get_json_schema_from_pydantic_object(
        self, response_format: Union[Any, Dict, None]
    ) -> Optional[dict]:
@ -302,6 +307,7 @@ class AnthropicConfig(BaseConfig):
        model: str,
        drop_params: bool,
    ) -> dict:
+
        for param, value in non_default_params.items():
            if param == "max_tokens":
                optional_params["max_tokens"] = value
@ -358,7 +364,8 @@ class AnthropicConfig(BaseConfig):
                optional_params["json_mode"] = True
            if param == "user":
                optional_params["metadata"] = {"user_id": value}
-
+            if param == "thinking":
+                optional_params["thinking"] = value
        return optional_params

    def _create_json_tool_call_for_response_format(
--- a/litellm/llms/openai/chat/o_series_transformation.py
+++ b/litellm/llms/openai/chat/o_series_transformation.py
@ -19,6 +19,7 @@ from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
 from litellm.utils import (
    supports_function_calling,
+    supports_parallel_function_calling,
    supports_response_schema,
    supports_system_messages,
 )
@ -76,14 +77,19 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
            model, custom_llm_provider
        )
        _supports_response_schema = supports_response_schema(model, custom_llm_provider)
+        _supports_parallel_tool_calls = supports_parallel_function_calling(
+            model, custom_llm_provider
+        )

        if not _supports_function_calling:
            non_supported_params.append("tools")
            non_supported_params.append("tool_choice")
-            non_supported_params.append("parallel_tool_calls")
            non_supported_params.append("function_call")
            non_supported_params.append("functions")

+        if not _supports_parallel_tool_calls:
+            non_supported_params.append("parallel_tool_calls")
+
        if not _supports_response_schema:
            non_supported_params.append("response_format")

--- a/litellm/main.py
+++ b/litellm/main.py
@ -166,6 +166,7 @@ from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels
 from .llms.vllm.completion import handler as vllm_handler
 from .llms.watsonx.chat.handler import WatsonXChatHandler
 from .llms.watsonx.common_utils import IBMWatsonXMixin
+from .types.llms.anthropic import AnthropicThinkingParam
 from .types.llms.openai import (
    ChatCompletionAssistantMessage,
    ChatCompletionAudioParam,
@ -341,6 +342,7 @@ async def acompletion(
    model_list: Optional[list] = None,  # pass in a list of api_base,keys, etc.
    extra_headers: Optional[dict] = None,
    # Optional liteLLM function params
+    thinking: Optional[AnthropicThinkingParam] = None,
    **kwargs,
 ) -> Union[ModelResponse, CustomStreamWrapper]:
    """
@ -800,6 +802,7 @@ def completion(  # type: ignore # noqa: PLR0915
    api_key: Optional[str] = None,
    model_list: Optional[list] = None,  # pass in a list of api_base,keys, etc.
    # Optional liteLLM function params
+    thinking: Optional[AnthropicThinkingParam] = None,
    **kwargs,
 ) -> Union[ModelResponse, CustomStreamWrapper]:
    """
@ -1106,6 +1109,7 @@ def completion(  # type: ignore # noqa: PLR0915
            parallel_tool_calls=parallel_tool_calls,
            messages=messages,
            reasoning_effort=reasoning_effort,
+            thinking=thinking,
            **non_default_params,
        )

--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@ -359,3 +359,8 @@ ANTHROPIC_API_HEADERS = {
 ANTHROPIC_API_ONLY_HEADERS = {  # fails if calling anthropic on vertex ai / bedrock
    "anthropic-beta",
 }
+
+
+class AnthropicThinkingParam(TypedDict, total=False):
+    type: Literal["enabled"]
+    budget_tokens: int
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -383,6 +383,7 @@ class ChatCompletionImageObject(TypedDict):
    type: Literal["image_url"]
    image_url: Union[str, ChatCompletionImageUrlObject]

+
 class ChatCompletionVideoUrlObject(TypedDict, total=False):
    url: Required[str]
    detail: str
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -119,7 +119,10 @@ from litellm.router_utils.get_retry_from_policy import (
    reset_retry_policy,
 )
 from litellm.secret_managers.main import get_secret
-from litellm.types.llms.anthropic import ANTHROPIC_API_ONLY_HEADERS
+from litellm.types.llms.anthropic import (
+    ANTHROPIC_API_ONLY_HEADERS,
+    AnthropicThinkingParam,
+)
 from litellm.types.llms.openai import (
    AllMessageValues,
    AllPromptValues,
@ -1969,6 +1972,19 @@ def supports_response_schema(
    )


+def supports_parallel_function_calling(
+    model: str, custom_llm_provider: Optional[str] = None
+) -> bool:
+    """
+    Check if the given model supports parallel tool calls and return a boolean value.
+    """
+    return _supports_factory(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        key="supports_parallel_function_calling",
+    )
+
+
 def supports_function_calling(
    model: str, custom_llm_provider: Optional[str] = None
 ) -> bool:
@ -2118,30 +2134,6 @@ def supports_embedding_image_input(
    )


-def supports_parallel_function_calling(model: str):
-    """
-    Check if the given model supports parallel function calling and return True if it does, False otherwise.
-
-    Parameters:
-        model (str): The model to check for support of parallel function calling.
-
-    Returns:
-        bool: True if the model supports parallel function calling, False otherwise.
-
-    Raises:
-        Exception: If the model is not found in the model_cost dictionary.
-    """
-    if model in litellm.model_cost:
-        model_info = litellm.model_cost[model]
-        if model_info.get("supports_parallel_function_calling", False) is True:
-            return True
-        return False
-    else:
-        raise Exception(
-            f"Model not supports parallel function calling. You passed model={model}."
-        )
-
-
 ####### HELPER FUNCTIONS ################
 def _update_dictionary(existing_dict: Dict, new_dict: dict) -> dict:
    for k, v in new_dict.items():
@ -2752,6 +2744,7 @@ def get_optional_params(  # noqa: PLR0915
    reasoning_effort=None,
    additional_drop_params=None,
    messages: Optional[List[AllMessageValues]] = None,
+    thinking: Optional[AnthropicThinkingParam] = None,
    **kwargs,
 ):
    # retrieve all parameters passed to the function
@ -2836,9 +2829,11 @@ def get_optional_params(  # noqa: PLR0915
        "additional_drop_params": None,
        "messages": None,
        "reasoning_effort": None,
+        "thinking": None,
    }

    # filter out those parameters that were passed with non-default values
+
    non_default_params = {
        k: v
        for k, v in passed_params.items()
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@ -1166,6 +1166,8 @@ def test_anthropic_citations_api_streaming():
 def test_anthropic_thinking_output():
    from litellm import completion

+    litellm._turn_on_debug()
+
    resp = completion(
        model="anthropic/claude-3-7-sonnet-20250219",
        messages=[{"role": "user", "content": "What is the capital of France?"}],
--- a/tests/llm_translation/test_openai.py
+++ b/tests/llm_translation/test_openai.py
@ -338,3 +338,18 @@ def test_openai_max_retries_0(mock_get_openai_client):

    mock_get_openai_client.assert_called_once()
    assert mock_get_openai_client.call_args.kwargs["max_retries"] == 0
+
+
+@pytest.mark.parametrize("model", ["o1", "o1-preview", "o1-mini", "o3-mini"])
+def test_o1_parallel_tool_calls(model):
+    litellm.completion(
+        model=model,
+        messages=[
+            {
+                "role": "user",
+                "content": "foo",
+            }
+        ],
+        parallel_tool_calls=True,
+        drop_params=True,
+    )
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -1069,7 +1069,6 @@ def test_gemini_frequency_penalty():
    assert optional_params["frequency_penalty"] == 0.5


-
 def test_azure_prediction_param():
    optional_params = get_optional_params(
        model="chatgpt-v2",
@ -1084,6 +1083,7 @@ def test_azure_prediction_param():
        "content": "LiteLLM is a very useful way to connect to a variety of LLMs.",
    }

+
 def test_vertex_ai_ft_llama():
    optional_params = get_optional_params(
        model="1984786713414729728",
@ -1093,3 +1093,24 @@ def test_vertex_ai_ft_llama():
    )
    assert optional_params["frequency_penalty"] == 0.5
    assert "max_retries" not in optional_params
+
+
+@pytest.mark.parametrize(
+    "model, expected_thinking",
+    [
+        ("claude-3-5-sonnet", False),
+        ("claude-3-7-sonnet", True),
+        ("gpt-3.5-turbo", False),
+    ],
+)
+def test_anthropic_thinking_param(model, expected_thinking):
+    optional_params = get_optional_params(
+        model=model,
+        custom_llm_provider="anthropic",
+        thinking={"type": "enabled", "budget_tokens": 1024},
+        drop_params=True,
+    )
+    if expected_thinking:
+        assert "thinking" in optional_params
+    else:
+        assert "thinking" not in optional_params
--- a/tests/local_testing/test_streaming.py
+++ b/tests/local_testing/test_streaming.py
@ -4072,7 +4072,7 @@ def test_mock_response_iterator_tool_use():
        "anthropic/claude-3-7-sonnet-20250219",
    ],
 )
-def test_deepseek_reasoning_content_completion(model):
+def test_reasoning_content_completion(model):
    # litellm.set_verbose = True
    try:
        # litellm._turn_on_debug()
@ -4081,7 +4081,6 @@ def test_deepseek_reasoning_content_completion(model):
            messages=[{"role": "user", "content": "Tell me a joke."}],
            stream=True,
            thinking={"type": "enabled", "budget_tokens": 1024},
-            timeout=5,
        )

        reasoning_content_exists = False