diff --git a/litellm/constants.py b/litellm/constants.py index a1f7750d53..06756b8f20 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -120,6 +120,7 @@ OPENAI_CHAT_COMPLETION_PARAMS = [ "top_logprobs", "reasoning_effort", "extra_headers", + "thinking", ] openai_compatible_endpoints: List = [ diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index 580b65f77f..6c56acc4da 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -80,7 +80,7 @@ class AnthropicConfig(BaseConfig): return super().get_config() def get_supported_openai_params(self, model: str): - return [ + params = [ "stream", "stop", "temperature", @@ -95,6 +95,11 @@ class AnthropicConfig(BaseConfig): "user", ] + if "claude-3-7-sonnet" in model: + params.append("thinking") + + return params + def get_json_schema_from_pydantic_object( self, response_format: Union[Any, Dict, None] ) -> Optional[dict]: @@ -302,6 +307,7 @@ class AnthropicConfig(BaseConfig): model: str, drop_params: bool, ) -> dict: + for param, value in non_default_params.items(): if param == "max_tokens": optional_params["max_tokens"] = value @@ -358,7 +364,8 @@ class AnthropicConfig(BaseConfig): optional_params["json_mode"] = True if param == "user": optional_params["metadata"] = {"user_id": value} - + if param == "thinking": + optional_params["thinking"] = value return optional_params def _create_json_tool_call_for_response_format( diff --git a/litellm/llms/openai/chat/o_series_transformation.py b/litellm/llms/openai/chat/o_series_transformation.py index 9e68fca46f..b74c7440b5 100644 --- a/litellm/llms/openai/chat/o_series_transformation.py +++ b/litellm/llms/openai/chat/o_series_transformation.py @@ -19,6 +19,7 @@ from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage from litellm.utils import ( supports_function_calling, + supports_parallel_function_calling, supports_response_schema, supports_system_messages, ) @@ -76,14 +77,19 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig): model, custom_llm_provider ) _supports_response_schema = supports_response_schema(model, custom_llm_provider) + _supports_parallel_tool_calls = supports_parallel_function_calling( + model, custom_llm_provider + ) if not _supports_function_calling: non_supported_params.append("tools") non_supported_params.append("tool_choice") - non_supported_params.append("parallel_tool_calls") non_supported_params.append("function_call") non_supported_params.append("functions") + if not _supports_parallel_tool_calls: + non_supported_params.append("parallel_tool_calls") + if not _supports_response_schema: non_supported_params.append("response_format") diff --git a/litellm/main.py b/litellm/main.py index c52bfd7c92..b19c4b31d5 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -166,6 +166,7 @@ from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels from .llms.vllm.completion import handler as vllm_handler from .llms.watsonx.chat.handler import WatsonXChatHandler from .llms.watsonx.common_utils import IBMWatsonXMixin +from .types.llms.anthropic import AnthropicThinkingParam from .types.llms.openai import ( ChatCompletionAssistantMessage, ChatCompletionAudioParam, @@ -341,6 +342,7 @@ async def acompletion( model_list: Optional[list] = None, # pass in a list of api_base,keys, etc. extra_headers: Optional[dict] = None, # Optional liteLLM function params + thinking: Optional[AnthropicThinkingParam] = None, **kwargs, ) -> Union[ModelResponse, CustomStreamWrapper]: """ @@ -800,6 +802,7 @@ def completion( # type: ignore # noqa: PLR0915 api_key: Optional[str] = None, model_list: Optional[list] = None, # pass in a list of api_base,keys, etc. # Optional liteLLM function params + thinking: Optional[AnthropicThinkingParam] = None, **kwargs, ) -> Union[ModelResponse, CustomStreamWrapper]: """ @@ -1106,6 +1109,7 @@ def completion( # type: ignore # noqa: PLR0915 parallel_tool_calls=parallel_tool_calls, messages=messages, reasoning_effort=reasoning_effort, + thinking=thinking, **non_default_params, ) diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 34eea3b0fe..367b2421fd 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -359,3 +359,8 @@ ANTHROPIC_API_HEADERS = { ANTHROPIC_API_ONLY_HEADERS = { # fails if calling anthropic on vertex ai / bedrock "anthropic-beta", } + + +class AnthropicThinkingParam(TypedDict, total=False): + type: Literal["enabled"] + budget_tokens: int diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 2e0673c947..a50d583987 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -383,6 +383,7 @@ class ChatCompletionImageObject(TypedDict): type: Literal["image_url"] image_url: Union[str, ChatCompletionImageUrlObject] + class ChatCompletionVideoUrlObject(TypedDict, total=False): url: Required[str] detail: str diff --git a/litellm/utils.py b/litellm/utils.py index 8f19e74509..601594beda 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -119,7 +119,10 @@ from litellm.router_utils.get_retry_from_policy import ( reset_retry_policy, ) from litellm.secret_managers.main import get_secret -from litellm.types.llms.anthropic import ANTHROPIC_API_ONLY_HEADERS +from litellm.types.llms.anthropic import ( + ANTHROPIC_API_ONLY_HEADERS, + AnthropicThinkingParam, +) from litellm.types.llms.openai import ( AllMessageValues, AllPromptValues, @@ -1969,6 +1972,19 @@ def supports_response_schema( ) +def supports_parallel_function_calling( + model: str, custom_llm_provider: Optional[str] = None +) -> bool: + """ + Check if the given model supports parallel tool calls and return a boolean value. + """ + return _supports_factory( + model=model, + custom_llm_provider=custom_llm_provider, + key="supports_parallel_function_calling", + ) + + def supports_function_calling( model: str, custom_llm_provider: Optional[str] = None ) -> bool: @@ -2118,30 +2134,6 @@ def supports_embedding_image_input( ) -def supports_parallel_function_calling(model: str): - """ - Check if the given model supports parallel function calling and return True if it does, False otherwise. - - Parameters: - model (str): The model to check for support of parallel function calling. - - Returns: - bool: True if the model supports parallel function calling, False otherwise. - - Raises: - Exception: If the model is not found in the model_cost dictionary. - """ - if model in litellm.model_cost: - model_info = litellm.model_cost[model] - if model_info.get("supports_parallel_function_calling", False) is True: - return True - return False - else: - raise Exception( - f"Model not supports parallel function calling. You passed model={model}." - ) - - ####### HELPER FUNCTIONS ################ def _update_dictionary(existing_dict: Dict, new_dict: dict) -> dict: for k, v in new_dict.items(): @@ -2752,6 +2744,7 @@ def get_optional_params( # noqa: PLR0915 reasoning_effort=None, additional_drop_params=None, messages: Optional[List[AllMessageValues]] = None, + thinking: Optional[AnthropicThinkingParam] = None, **kwargs, ): # retrieve all parameters passed to the function @@ -2836,9 +2829,11 @@ def get_optional_params( # noqa: PLR0915 "additional_drop_params": None, "messages": None, "reasoning_effort": None, + "thinking": None, } # filter out those parameters that were passed with non-default values + non_default_params = { k: v for k, v in passed_params.items() diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py index 44c305b02e..48e38effd9 100644 --- a/tests/llm_translation/test_anthropic_completion.py +++ b/tests/llm_translation/test_anthropic_completion.py @@ -1166,6 +1166,8 @@ def test_anthropic_citations_api_streaming(): def test_anthropic_thinking_output(): from litellm import completion + litellm._turn_on_debug() + resp = completion( model="anthropic/claude-3-7-sonnet-20250219", messages=[{"role": "user", "content": "What is the capital of France?"}], diff --git a/tests/llm_translation/test_openai.py b/tests/llm_translation/test_openai.py index be3fbfde42..2071c215de 100644 --- a/tests/llm_translation/test_openai.py +++ b/tests/llm_translation/test_openai.py @@ -338,3 +338,18 @@ def test_openai_max_retries_0(mock_get_openai_client): mock_get_openai_client.assert_called_once() assert mock_get_openai_client.call_args.kwargs["max_retries"] == 0 + + +@pytest.mark.parametrize("model", ["o1", "o1-preview", "o1-mini", "o3-mini"]) +def test_o1_parallel_tool_calls(model): + litellm.completion( + model=model, + messages=[ + { + "role": "user", + "content": "foo", + } + ], + parallel_tool_calls=True, + drop_params=True, + ) diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py index 01c751e146..09071debc8 100644 --- a/tests/llm_translation/test_optional_params.py +++ b/tests/llm_translation/test_optional_params.py @@ -1069,7 +1069,6 @@ def test_gemini_frequency_penalty(): assert optional_params["frequency_penalty"] == 0.5 - def test_azure_prediction_param(): optional_params = get_optional_params( model="chatgpt-v2", @@ -1084,6 +1083,7 @@ def test_azure_prediction_param(): "content": "LiteLLM is a very useful way to connect to a variety of LLMs.", } + def test_vertex_ai_ft_llama(): optional_params = get_optional_params( model="1984786713414729728", @@ -1093,3 +1093,24 @@ def test_vertex_ai_ft_llama(): ) assert optional_params["frequency_penalty"] == 0.5 assert "max_retries" not in optional_params + + +@pytest.mark.parametrize( + "model, expected_thinking", + [ + ("claude-3-5-sonnet", False), + ("claude-3-7-sonnet", True), + ("gpt-3.5-turbo", False), + ], +) +def test_anthropic_thinking_param(model, expected_thinking): + optional_params = get_optional_params( + model=model, + custom_llm_provider="anthropic", + thinking={"type": "enabled", "budget_tokens": 1024}, + drop_params=True, + ) + if expected_thinking: + assert "thinking" in optional_params + else: + assert "thinking" not in optional_params diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py index f3780db129..61b255b1d0 100644 --- a/tests/local_testing/test_streaming.py +++ b/tests/local_testing/test_streaming.py @@ -4072,7 +4072,7 @@ def test_mock_response_iterator_tool_use(): "anthropic/claude-3-7-sonnet-20250219", ], ) -def test_deepseek_reasoning_content_completion(model): +def test_reasoning_content_completion(model): # litellm.set_verbose = True try: # litellm._turn_on_debug() @@ -4081,7 +4081,6 @@ def test_deepseek_reasoning_content_completion(model): messages=[{"role": "user", "content": "Tell me a joke."}], stream=True, thinking={"type": "enabled", "budget_tokens": 1024}, - timeout=5, ) reasoning_content_exists = False