mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
fix(o_series_transformation.py): fix optional param check for o-serie… (#8787)
* fix(o_series_transformation.py): fix optional param check for o-series models o3-mini and o-1 do not support parallel tool calling * fix(utils.py): support 'drop_params' for 'thinking' param across models allows switching to older claude versions (or non-anthropic models) and param to be safely dropped * fix: fix passing thinking param in optional params allows dropping thinking_param where not applicable * test: update old model * fix(utils.py): fix linting errors * fix(main.py): add param to acompletion
This commit is contained in:
parent
aabb5c0df4
commit
017c482d7b
11 changed files with 87 additions and 31 deletions
|
@ -120,6 +120,7 @@ OPENAI_CHAT_COMPLETION_PARAMS = [
|
||||||
"top_logprobs",
|
"top_logprobs",
|
||||||
"reasoning_effort",
|
"reasoning_effort",
|
||||||
"extra_headers",
|
"extra_headers",
|
||||||
|
"thinking",
|
||||||
]
|
]
|
||||||
|
|
||||||
openai_compatible_endpoints: List = [
|
openai_compatible_endpoints: List = [
|
||||||
|
|
|
@ -80,7 +80,7 @@ class AnthropicConfig(BaseConfig):
|
||||||
return super().get_config()
|
return super().get_config()
|
||||||
|
|
||||||
def get_supported_openai_params(self, model: str):
|
def get_supported_openai_params(self, model: str):
|
||||||
return [
|
params = [
|
||||||
"stream",
|
"stream",
|
||||||
"stop",
|
"stop",
|
||||||
"temperature",
|
"temperature",
|
||||||
|
@ -95,6 +95,11 @@ class AnthropicConfig(BaseConfig):
|
||||||
"user",
|
"user",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if "claude-3-7-sonnet" in model:
|
||||||
|
params.append("thinking")
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
def get_json_schema_from_pydantic_object(
|
def get_json_schema_from_pydantic_object(
|
||||||
self, response_format: Union[Any, Dict, None]
|
self, response_format: Union[Any, Dict, None]
|
||||||
) -> Optional[dict]:
|
) -> Optional[dict]:
|
||||||
|
@ -302,6 +307,7 @@ class AnthropicConfig(BaseConfig):
|
||||||
model: str,
|
model: str,
|
||||||
drop_params: bool,
|
drop_params: bool,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
|
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
if param == "max_tokens":
|
if param == "max_tokens":
|
||||||
optional_params["max_tokens"] = value
|
optional_params["max_tokens"] = value
|
||||||
|
@ -358,7 +364,8 @@ class AnthropicConfig(BaseConfig):
|
||||||
optional_params["json_mode"] = True
|
optional_params["json_mode"] = True
|
||||||
if param == "user":
|
if param == "user":
|
||||||
optional_params["metadata"] = {"user_id": value}
|
optional_params["metadata"] = {"user_id": value}
|
||||||
|
if param == "thinking":
|
||||||
|
optional_params["thinking"] = value
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def _create_json_tool_call_for_response_format(
|
def _create_json_tool_call_for_response_format(
|
||||||
|
|
|
@ -19,6 +19,7 @@ from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
|
||||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
|
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
supports_function_calling,
|
supports_function_calling,
|
||||||
|
supports_parallel_function_calling,
|
||||||
supports_response_schema,
|
supports_response_schema,
|
||||||
supports_system_messages,
|
supports_system_messages,
|
||||||
)
|
)
|
||||||
|
@ -76,14 +77,19 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
|
||||||
model, custom_llm_provider
|
model, custom_llm_provider
|
||||||
)
|
)
|
||||||
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
|
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
|
||||||
|
_supports_parallel_tool_calls = supports_parallel_function_calling(
|
||||||
|
model, custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
if not _supports_function_calling:
|
if not _supports_function_calling:
|
||||||
non_supported_params.append("tools")
|
non_supported_params.append("tools")
|
||||||
non_supported_params.append("tool_choice")
|
non_supported_params.append("tool_choice")
|
||||||
non_supported_params.append("parallel_tool_calls")
|
|
||||||
non_supported_params.append("function_call")
|
non_supported_params.append("function_call")
|
||||||
non_supported_params.append("functions")
|
non_supported_params.append("functions")
|
||||||
|
|
||||||
|
if not _supports_parallel_tool_calls:
|
||||||
|
non_supported_params.append("parallel_tool_calls")
|
||||||
|
|
||||||
if not _supports_response_schema:
|
if not _supports_response_schema:
|
||||||
non_supported_params.append("response_format")
|
non_supported_params.append("response_format")
|
||||||
|
|
||||||
|
|
|
@ -166,6 +166,7 @@ from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels
|
||||||
from .llms.vllm.completion import handler as vllm_handler
|
from .llms.vllm.completion import handler as vllm_handler
|
||||||
from .llms.watsonx.chat.handler import WatsonXChatHandler
|
from .llms.watsonx.chat.handler import WatsonXChatHandler
|
||||||
from .llms.watsonx.common_utils import IBMWatsonXMixin
|
from .llms.watsonx.common_utils import IBMWatsonXMixin
|
||||||
|
from .types.llms.anthropic import AnthropicThinkingParam
|
||||||
from .types.llms.openai import (
|
from .types.llms.openai import (
|
||||||
ChatCompletionAssistantMessage,
|
ChatCompletionAssistantMessage,
|
||||||
ChatCompletionAudioParam,
|
ChatCompletionAudioParam,
|
||||||
|
@ -341,6 +342,7 @@ async def acompletion(
|
||||||
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
||||||
extra_headers: Optional[dict] = None,
|
extra_headers: Optional[dict] = None,
|
||||||
# Optional liteLLM function params
|
# Optional liteLLM function params
|
||||||
|
thinking: Optional[AnthropicThinkingParam] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
"""
|
"""
|
||||||
|
@ -800,6 +802,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
||||||
# Optional liteLLM function params
|
# Optional liteLLM function params
|
||||||
|
thinking: Optional[AnthropicThinkingParam] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
"""
|
"""
|
||||||
|
@ -1106,6 +1109,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
parallel_tool_calls=parallel_tool_calls,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
reasoning_effort=reasoning_effort,
|
reasoning_effort=reasoning_effort,
|
||||||
|
thinking=thinking,
|
||||||
**non_default_params,
|
**non_default_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -359,3 +359,8 @@ ANTHROPIC_API_HEADERS = {
|
||||||
ANTHROPIC_API_ONLY_HEADERS = { # fails if calling anthropic on vertex ai / bedrock
|
ANTHROPIC_API_ONLY_HEADERS = { # fails if calling anthropic on vertex ai / bedrock
|
||||||
"anthropic-beta",
|
"anthropic-beta",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicThinkingParam(TypedDict, total=False):
|
||||||
|
type: Literal["enabled"]
|
||||||
|
budget_tokens: int
|
||||||
|
|
|
@ -383,6 +383,7 @@ class ChatCompletionImageObject(TypedDict):
|
||||||
type: Literal["image_url"]
|
type: Literal["image_url"]
|
||||||
image_url: Union[str, ChatCompletionImageUrlObject]
|
image_url: Union[str, ChatCompletionImageUrlObject]
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionVideoUrlObject(TypedDict, total=False):
|
class ChatCompletionVideoUrlObject(TypedDict, total=False):
|
||||||
url: Required[str]
|
url: Required[str]
|
||||||
detail: str
|
detail: str
|
||||||
|
|
|
@ -119,7 +119,10 @@ from litellm.router_utils.get_retry_from_policy import (
|
||||||
reset_retry_policy,
|
reset_retry_policy,
|
||||||
)
|
)
|
||||||
from litellm.secret_managers.main import get_secret
|
from litellm.secret_managers.main import get_secret
|
||||||
from litellm.types.llms.anthropic import ANTHROPIC_API_ONLY_HEADERS
|
from litellm.types.llms.anthropic import (
|
||||||
|
ANTHROPIC_API_ONLY_HEADERS,
|
||||||
|
AnthropicThinkingParam,
|
||||||
|
)
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
AllPromptValues,
|
AllPromptValues,
|
||||||
|
@ -1969,6 +1972,19 @@ def supports_response_schema(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def supports_parallel_function_calling(
|
||||||
|
model: str, custom_llm_provider: Optional[str] = None
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the given model supports parallel tool calls and return a boolean value.
|
||||||
|
"""
|
||||||
|
return _supports_factory(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
key="supports_parallel_function_calling",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def supports_function_calling(
|
def supports_function_calling(
|
||||||
model: str, custom_llm_provider: Optional[str] = None
|
model: str, custom_llm_provider: Optional[str] = None
|
||||||
) -> bool:
|
) -> bool:
|
||||||
|
@ -2118,30 +2134,6 @@ def supports_embedding_image_input(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def supports_parallel_function_calling(model: str):
|
|
||||||
"""
|
|
||||||
Check if the given model supports parallel function calling and return True if it does, False otherwise.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
model (str): The model to check for support of parallel function calling.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bool: True if the model supports parallel function calling, False otherwise.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
Exception: If the model is not found in the model_cost dictionary.
|
|
||||||
"""
|
|
||||||
if model in litellm.model_cost:
|
|
||||||
model_info = litellm.model_cost[model]
|
|
||||||
if model_info.get("supports_parallel_function_calling", False) is True:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
f"Model not supports parallel function calling. You passed model={model}."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
def _update_dictionary(existing_dict: Dict, new_dict: dict) -> dict:
|
def _update_dictionary(existing_dict: Dict, new_dict: dict) -> dict:
|
||||||
for k, v in new_dict.items():
|
for k, v in new_dict.items():
|
||||||
|
@ -2752,6 +2744,7 @@ def get_optional_params( # noqa: PLR0915
|
||||||
reasoning_effort=None,
|
reasoning_effort=None,
|
||||||
additional_drop_params=None,
|
additional_drop_params=None,
|
||||||
messages: Optional[List[AllMessageValues]] = None,
|
messages: Optional[List[AllMessageValues]] = None,
|
||||||
|
thinking: Optional[AnthropicThinkingParam] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
# retrieve all parameters passed to the function
|
# retrieve all parameters passed to the function
|
||||||
|
@ -2836,9 +2829,11 @@ def get_optional_params( # noqa: PLR0915
|
||||||
"additional_drop_params": None,
|
"additional_drop_params": None,
|
||||||
"messages": None,
|
"messages": None,
|
||||||
"reasoning_effort": None,
|
"reasoning_effort": None,
|
||||||
|
"thinking": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
# filter out those parameters that were passed with non-default values
|
# filter out those parameters that were passed with non-default values
|
||||||
|
|
||||||
non_default_params = {
|
non_default_params = {
|
||||||
k: v
|
k: v
|
||||||
for k, v in passed_params.items()
|
for k, v in passed_params.items()
|
||||||
|
|
|
@ -1166,6 +1166,8 @@ def test_anthropic_citations_api_streaming():
|
||||||
def test_anthropic_thinking_output():
|
def test_anthropic_thinking_output():
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
litellm._turn_on_debug()
|
||||||
|
|
||||||
resp = completion(
|
resp = completion(
|
||||||
model="anthropic/claude-3-7-sonnet-20250219",
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
|
|
@ -338,3 +338,18 @@ def test_openai_max_retries_0(mock_get_openai_client):
|
||||||
|
|
||||||
mock_get_openai_client.assert_called_once()
|
mock_get_openai_client.assert_called_once()
|
||||||
assert mock_get_openai_client.call_args.kwargs["max_retries"] == 0
|
assert mock_get_openai_client.call_args.kwargs["max_retries"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", ["o1", "o1-preview", "o1-mini", "o3-mini"])
|
||||||
|
def test_o1_parallel_tool_calls(model):
|
||||||
|
litellm.completion(
|
||||||
|
model=model,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "foo",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
parallel_tool_calls=True,
|
||||||
|
drop_params=True,
|
||||||
|
)
|
||||||
|
|
|
@ -1069,7 +1069,6 @@ def test_gemini_frequency_penalty():
|
||||||
assert optional_params["frequency_penalty"] == 0.5
|
assert optional_params["frequency_penalty"] == 0.5
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_azure_prediction_param():
|
def test_azure_prediction_param():
|
||||||
optional_params = get_optional_params(
|
optional_params = get_optional_params(
|
||||||
model="chatgpt-v2",
|
model="chatgpt-v2",
|
||||||
|
@ -1084,6 +1083,7 @@ def test_azure_prediction_param():
|
||||||
"content": "LiteLLM is a very useful way to connect to a variety of LLMs.",
|
"content": "LiteLLM is a very useful way to connect to a variety of LLMs.",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_vertex_ai_ft_llama():
|
def test_vertex_ai_ft_llama():
|
||||||
optional_params = get_optional_params(
|
optional_params = get_optional_params(
|
||||||
model="1984786713414729728",
|
model="1984786713414729728",
|
||||||
|
@ -1093,3 +1093,24 @@ def test_vertex_ai_ft_llama():
|
||||||
)
|
)
|
||||||
assert optional_params["frequency_penalty"] == 0.5
|
assert optional_params["frequency_penalty"] == 0.5
|
||||||
assert "max_retries" not in optional_params
|
assert "max_retries" not in optional_params
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model, expected_thinking",
|
||||||
|
[
|
||||||
|
("claude-3-5-sonnet", False),
|
||||||
|
("claude-3-7-sonnet", True),
|
||||||
|
("gpt-3.5-turbo", False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_anthropic_thinking_param(model, expected_thinking):
|
||||||
|
optional_params = get_optional_params(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="anthropic",
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
drop_params=True,
|
||||||
|
)
|
||||||
|
if expected_thinking:
|
||||||
|
assert "thinking" in optional_params
|
||||||
|
else:
|
||||||
|
assert "thinking" not in optional_params
|
||||||
|
|
|
@ -4072,7 +4072,7 @@ def test_mock_response_iterator_tool_use():
|
||||||
"anthropic/claude-3-7-sonnet-20250219",
|
"anthropic/claude-3-7-sonnet-20250219",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_deepseek_reasoning_content_completion(model):
|
def test_reasoning_content_completion(model):
|
||||||
# litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
try:
|
try:
|
||||||
# litellm._turn_on_debug()
|
# litellm._turn_on_debug()
|
||||||
|
@ -4081,7 +4081,6 @@ def test_deepseek_reasoning_content_completion(model):
|
||||||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||||
stream=True,
|
stream=True,
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
timeout=5,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
reasoning_content_exists = False
|
reasoning_content_exists = False
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue