diff --git a/docs/my-website/docs/reasoning_content.md b/docs/my-website/docs/reasoning_content.md index 45f7aad0f1..b384eb92ac 100644 --- a/docs/my-website/docs/reasoning_content.md +++ b/docs/my-website/docs/reasoning_content.md @@ -18,7 +18,7 @@ Supported Providers: LiteLLM will standardize the `reasoning_content` in the response and `thinking_blocks` in the assistant message. -```python +```python title="Example response from litellm" "message": { ... "reasoning_content": "The capital of France is Paris.", @@ -37,7 +37,7 @@ LiteLLM will standardize the `reasoning_content` in the response and `thinking_b -```python +```python showLineNumbers from litellm import completion import os @@ -111,7 +111,7 @@ Here's how to use `thinking` blocks by Anthropic with tool calling. -```python +```python showLineNumbers litellm._turn_on_debug() litellm.modify_params = True model = "anthropic/claude-3-7-sonnet-20250219" # works across Anthropic, Bedrock, Vertex AI @@ -210,7 +210,7 @@ if tool_calls: 1. Setup config.yaml -```yaml +```yaml showLineNumbers model_list: - model_name: claude-3-7-sonnet-thinking litellm_params: @@ -224,7 +224,7 @@ model_list: 2. Run proxy -```bash +```bash showLineNumbers litellm --config config.yaml # RUNNING on http://0.0.0.0:4000 @@ -332,7 +332,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \ Set `drop_params=True` to drop the 'thinking' blocks when swapping from Anthropic to Deepseek models. Suggest improvements to this approach [here](https://github.com/BerriAI/litellm/discussions/8927). -```python +```python showLineNumbers litellm.drop_params = True # 👈 EITHER GLOBALLY or per request # or per request @@ -373,7 +373,7 @@ You can also pass the `thinking` parameter to Anthropic models. -```python +```python showLineNumbers response = litellm.completion( model="anthropic/claude-3-7-sonnet-20250219", messages=[{"role": "user", "content": "What is the capital of France?"}], @@ -395,5 +395,92 @@ curl http://0.0.0.0:4000/v1/chat/completions \ }' ``` + + + +## Checking if a model supports reasoning + + + + +Use `litellm.supports_reasoning(model="")` -> returns `True` if model supports reasoning and `False` if not. + +```python showLineNumbers title="litellm.supports_reasoning() usage" +import litellm + +# Example models that support reasoning +assert litellm.supports_reasoning(model="anthropic/claude-3-7-sonnet-20250219") == True +assert litellm.supports_reasoning(model="deepseek/deepseek-chat") == True + +# Example models that do not support reasoning +assert litellm.supports_reasoning(model="openai/gpt-3.5-turbo") == False +``` + + + + +1. Define models that support reasoning in your `config.yaml`. You can optionally add `supports_reasoning: True` to the `model_info` if LiteLLM does not automatically detect it for your custom model. + +```yaml showLineNumbers title="litellm proxy config.yaml" +model_list: + - model_name: claude-3-sonnet-reasoning + litellm_params: + model: anthropic/claude-3-7-sonnet-20250219 + api_key: os.environ/ANTHROPIC_API_KEY + - model_name: deepseek-reasoning + litellm_params: + model: deepseek/deepseek-chat + api_key: os.environ/DEEPSEEK_API_KEY + # Example for a custom model where detection might be needed + - model_name: my-custom-reasoning-model + litellm_params: + model: openai/my-custom-model # Assuming it's OpenAI compatible + api_base: http://localhost:8000 + api_key: fake-key + model_info: + supports_reasoning: True # Explicitly mark as supporting reasoning +``` + +2. Run the proxy server: + +```bash showLineNumbers title="litellm --config config.yaml" +litellm --config config.yaml +``` + +3. Call `/model_group/info` to check if your model supports `reasoning` + +```shell showLineNumbers title="curl /model_group/info" +curl -X 'GET' \ + 'http://localhost:4000/model_group/info' \ + -H 'accept: application/json' \ + -H 'x-api-key: sk-1234' +``` + +Expected Response + +```json showLineNumbers title="response from /model_group/info" +{ + "data": [ + { + "model_group": "claude-3-sonnet-reasoning", + "providers": ["anthropic"], + "mode": "chat", + "supports_reasoning": true, + }, + { + "model_group": "deepseek-reasoning", + "providers": ["deepseek"], + "supports_reasoning": true, + }, + { + "model_group": "my-custom-reasoning-model", + "providers": ["openai"], + "supports_reasoning": true, + } + ] +} +```` + + diff --git a/litellm/__init__.py b/litellm/__init__.py index 35051c65a5..f27fa98029 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -772,6 +772,7 @@ from .utils import ( supports_audio_input, supports_audio_output, supports_system_messages, + supports_reasoning, get_litellm_params, acreate, get_max_tokens, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 08e9a7791b..0696b5e18a 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -15,6 +15,7 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, + "supports_reasoning": true, "supports_web_search": true, "search_context_cost_per_query": { "search_context_size_low": 0.0000, @@ -379,6 +380,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_native_streaming": false, + "supports_reasoning": true, "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] @@ -401,6 +403,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_native_streaming": false, + "supports_reasoning": true, "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] @@ -420,6 +423,7 @@ "supports_prompt_caching": true, "supports_system_messages": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "o1-mini": { @@ -448,6 +452,7 @@ "supports_vision": false, "supports_prompt_caching": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "o3-mini-2025-01-31": { @@ -464,6 +469,7 @@ "supports_vision": false, "supports_prompt_caching": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "o1-mini-2024-09-12": { @@ -476,6 +482,7 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true }, "o1-preview": { @@ -488,6 +495,7 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true }, "o1-preview-2024-09-12": { @@ -500,6 +508,7 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true }, "o1-2024-12-17": { @@ -517,6 +526,7 @@ "supports_prompt_caching": true, "supports_system_messages": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "chatgpt-4o-latest": { @@ -1416,6 +1426,7 @@ "cache_read_input_token_cost": 0.00000055, "litellm_provider": "azure", "mode": "chat", + "supports_reasoning": true, "supports_vision": false, "supports_prompt_caching": true, "supports_tool_choice": true @@ -1432,6 +1443,7 @@ "litellm_provider": "azure", "mode": "chat", "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true, "supports_tool_choice": true }, @@ -1447,6 +1459,7 @@ "litellm_provider": "azure", "mode": "chat", "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true, "supports_tool_choice": true }, @@ -1477,6 +1490,7 @@ "mode": "chat", "supports_vision": false, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true }, @@ -1492,6 +1506,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true }, "azure/o1-mini-2024-09-12": { @@ -1506,6 +1521,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true }, "azure/us/o1-mini-2024-09-12": { @@ -1552,6 +1568,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true, "supports_tool_choice": true }, @@ -1567,6 +1584,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true, "supports_tool_choice": true }, @@ -1612,6 +1630,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true }, "azure/o1-preview-2024-09-12": { @@ -1626,6 +1645,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true }, "azure/us/o1-preview-2024-09-12": { @@ -2284,6 +2304,7 @@ "litellm_provider": "azure_ai", "mode": "chat", "supports_tool_choice": true, + "supports_reasoning": true, "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367" }, "azure_ai/deepseek-v3": { @@ -2984,6 +3005,7 @@ "supports_function_calling": true, "supports_assistant_prefill": true, "supports_tool_choice": true, + "supports_reasoning": true, "supports_prompt_caching": true }, "deepseek/deepseek-chat": { @@ -3146,6 +3168,7 @@ "mode": "chat", "supports_function_calling": true, "supports_tool_choice": true, + "supports_reasoning": true, "supports_response_schema": false, "source": "https://x.ai/api#pricing" }, @@ -3159,6 +3182,7 @@ "mode": "chat", "supports_function_calling": true, "supports_tool_choice": true, + "supports_reasoning": true, "supports_response_schema": false, "source": "https://x.ai/api#pricing" }, @@ -3170,6 +3194,7 @@ "output_cost_per_token": 0.000004, "litellm_provider": "xai", "mode": "chat", + "supports_reasoning": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_response_schema": false, @@ -3245,6 +3270,7 @@ "mode": "chat", "supports_system_messages": false, "supports_function_calling": false, + "supports_reasoning": true, "supports_response_schema": false, "supports_tool_choice": true }, @@ -3756,7 +3782,8 @@ "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-06-01", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_reasoning": true }, "claude-3-7-sonnet-20250219": { "max_tokens": 128000, @@ -3776,7 +3803,8 @@ "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2026-02-01", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_reasoning": true }, "claude-3-5-sonnet-20241022": { "max_tokens": 8192, @@ -5354,6 +5382,7 @@ "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-06-01", + "supports_reasoning": true, "supports_tool_choice": true }, "vertex_ai/claude-3-haiku": { @@ -6671,6 +6700,7 @@ "mode": "chat", "supports_function_calling": true, "supports_assistant_prefill": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_prompt_caching": true }, @@ -6846,6 +6876,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_reasoning": true, "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, "supports_tool_choice": true @@ -6861,6 +6892,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_reasoning": true, "tool_use_system_prompt_tokens": 159, "supports_tool_choice": true }, @@ -7028,6 +7060,7 @@ "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, + "supports_reasoning": true, "supports_parallel_function_calling": true, "supports_vision": false, "supports_tool_choice": true @@ -7041,6 +7074,7 @@ "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, + "supports_reasoning": true, "supports_parallel_function_calling": true, "supports_vision": false, "supports_tool_choice": true @@ -7858,6 +7892,7 @@ "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "anthropic.claude-3-5-sonnet-20241022-v2:0": { @@ -7975,7 +8010,8 @@ "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_reasoning": true }, "us.anthropic.claude-3-haiku-20240307-v1:0": { "max_tokens": 4096, @@ -8734,6 +8770,7 @@ "output_cost_per_token": 0.0000054, "litellm_provider": "bedrock_converse", "mode": "chat", + "supports_reasoning": true, "supports_function_calling": false, "supports_tool_choice": false @@ -10560,7 +10597,8 @@ "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, "supports_assistant_prefill": true, "supports_function_calling": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_reasoning": true }, "databricks/databricks-meta-llama-3-1-405b-instruct": { "max_tokens": 128000, @@ -10818,6 +10856,7 @@ "max_input_tokens": 32768, "max_output_tokens": 8192, "litellm_provider": "snowflake", + "supports_reasoning": true, "mode": "chat" }, "snowflake/snowflake-arctic": { diff --git a/litellm/router.py b/litellm/router.py index 39b3ddcc16..36bfca523c 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -339,9 +339,9 @@ class Router: ) # names of models under litellm_params. ex. azure/chatgpt-v-2 self.deployment_latency_map = {} ### CACHING ### - cache_type: Literal[ - "local", "redis", "redis-semantic", "s3", "disk" - ] = "local" # default to an in-memory cache + cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = ( + "local" # default to an in-memory cache + ) redis_cache = None cache_config: Dict[str, Any] = {} @@ -562,9 +562,9 @@ class Router: ) ) - self.model_group_retry_policy: Optional[ - Dict[str, RetryPolicy] - ] = model_group_retry_policy + self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = ( + model_group_retry_policy + ) self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None if allowed_fails_policy is not None: @@ -619,7 +619,7 @@ class Router: @staticmethod def _create_redis_cache( - cache_config: Dict[str, Any] + cache_config: Dict[str, Any], ) -> Union[RedisCache, RedisClusterCache]: """ Initializes either a RedisCache or RedisClusterCache based on the cache_config. @@ -1099,9 +1099,9 @@ class Router: """ Adds default litellm params to kwargs, if set. """ - self.default_litellm_params[ - metadata_variable_name - ] = self.default_litellm_params.pop("metadata", {}) + self.default_litellm_params[metadata_variable_name] = ( + self.default_litellm_params.pop("metadata", {}) + ) for k, v in self.default_litellm_params.items(): if ( k not in kwargs and v is not None @@ -3217,11 +3217,11 @@ class Router: if isinstance(e, litellm.ContextWindowExceededError): if context_window_fallbacks is not None: - fallback_model_group: Optional[ - List[str] - ] = self._get_fallback_model_group_from_fallbacks( - fallbacks=context_window_fallbacks, - model_group=model_group, + fallback_model_group: Optional[List[str]] = ( + self._get_fallback_model_group_from_fallbacks( + fallbacks=context_window_fallbacks, + model_group=model_group, + ) ) if fallback_model_group is None: raise original_exception @@ -3253,11 +3253,11 @@ class Router: e.message += "\n{}".format(error_message) elif isinstance(e, litellm.ContentPolicyViolationError): if content_policy_fallbacks is not None: - fallback_model_group: Optional[ - List[str] - ] = self._get_fallback_model_group_from_fallbacks( - fallbacks=content_policy_fallbacks, - model_group=model_group, + fallback_model_group: Optional[List[str]] = ( + self._get_fallback_model_group_from_fallbacks( + fallbacks=content_policy_fallbacks, + model_group=model_group, + ) ) if fallback_model_group is None: raise original_exception @@ -5020,6 +5020,11 @@ class Router: and model_info["supports_web_search"] is True # type: ignore ): model_group_info.supports_web_search = True + if ( + model_info.get("supports_reasoning", None) is not None + and model_info["supports_reasoning"] is True # type: ignore + ): + model_group_info.supports_reasoning = True if ( model_info.get("supported_openai_params", None) is not None and model_info["supported_openai_params"] is not None diff --git a/litellm/types/router.py b/litellm/types/router.py index 745d7640e2..fa8273e7b2 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -96,16 +96,18 @@ class ModelInfo(BaseModel): id: Optional[ str ] # Allow id to be optional on input, but it will always be present as a str in the model instance - db_model: bool = False # used for proxy - to separate models which are stored in the db vs. config. + db_model: bool = ( + False # used for proxy - to separate models which are stored in the db vs. config. + ) updated_at: Optional[datetime.datetime] = None updated_by: Optional[str] = None created_at: Optional[datetime.datetime] = None created_by: Optional[str] = None - base_model: Optional[ - str - ] = None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking + base_model: Optional[str] = ( + None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking + ) tier: Optional[Literal["free", "paid"]] = None """ @@ -178,12 +180,12 @@ class GenericLiteLLMParams(CredentialLiteLLMParams, CustomPricingLiteLLMParams): custom_llm_provider: Optional[str] = None tpm: Optional[int] = None rpm: Optional[int] = None - timeout: Optional[ - Union[float, str, httpx.Timeout] - ] = None # if str, pass in as os.environ/ - stream_timeout: Optional[ - Union[float, str] - ] = None # timeout when making stream=True calls, if str, pass in as os.environ/ + timeout: Optional[Union[float, str, httpx.Timeout]] = ( + None # if str, pass in as os.environ/ + ) + stream_timeout: Optional[Union[float, str]] = ( + None # timeout when making stream=True calls, if str, pass in as os.environ/ + ) max_retries: Optional[int] = None organization: Optional[str] = None # for openai orgs configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None @@ -253,9 +255,9 @@ class GenericLiteLLMParams(CredentialLiteLLMParams, CustomPricingLiteLLMParams): if max_retries is not None and isinstance(max_retries, str): max_retries = int(max_retries) # cast to int # We need to keep max_retries in args since it's a parameter of GenericLiteLLMParams - args[ - "max_retries" - ] = max_retries # Put max_retries back in args after popping it + args["max_retries"] = ( + max_retries # Put max_retries back in args after popping it + ) super().__init__(**args, **params) def __contains__(self, key): @@ -562,6 +564,7 @@ class ModelGroupInfo(BaseModel): supports_parallel_function_calling: bool = Field(default=False) supports_vision: bool = Field(default=False) supports_web_search: bool = Field(default=False) + supports_reasoning: bool = Field(default=False) supports_function_calling: bool = Field(default=False) supported_openai_params: Optional[List[str]] = Field(default=[]) configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 1bbec44b82..452e43c82b 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -101,6 +101,7 @@ class ProviderSpecificModelInfo(TypedDict, total=False): supports_native_streaming: Optional[bool] supports_parallel_function_calling: Optional[bool] supports_web_search: Optional[bool] + supports_reasoning: Optional[bool] class SearchContextCostPerQuery(TypedDict, total=False): diff --git a/litellm/utils.py b/litellm/utils.py index 25d2f2105e..b31b929b27 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -484,7 +484,7 @@ def load_credentials_from_list(kwargs: dict): def get_dynamic_callbacks( - dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] + dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]], ) -> List: returned_callbacks = litellm.callbacks.copy() if dynamic_callbacks: @@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915 function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None ## DYNAMIC CALLBACKS ## - dynamic_callbacks: Optional[ - List[Union[str, Callable, CustomLogger]] - ] = kwargs.pop("callbacks", None) + dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = ( + kwargs.pop("callbacks", None) + ) all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks) if len(all_callbacks) > 0: @@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915 exception=e, retry_policy=kwargs.get("retry_policy"), ) - kwargs[ - "retry_policy" - ] = reset_retry_policy() # prevent infinite loops + kwargs["retry_policy"] = ( + reset_retry_policy() + ) # prevent infinite loops litellm.num_retries = ( None # set retries to None to prevent infinite loops ) @@ -2229,6 +2229,15 @@ def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bo ) +def supports_reasoning(model: str, custom_llm_provider: Optional[str] = None) -> bool: + """ + Check if the given model supports reasoning and return a boolean value. + """ + return _supports_factory( + model=model, custom_llm_provider=custom_llm_provider, key="supports_reasoning" + ) + + def supports_embedding_image_input( model: str, custom_llm_provider: Optional[str] = None ) -> bool: @@ -3004,16 +3013,16 @@ def get_optional_params( # noqa: PLR0915 True # so that main.py adds the function call to the prompt ) if "tools" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("tools") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("tools") + ) non_default_params.pop( "tool_choice", None ) # causes ollama requests to hang elif "functions" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("functions") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("functions") + ) elif ( litellm.add_function_to_prompt ): # if user opts to add it to prompt instead @@ -3036,10 +3045,10 @@ def get_optional_params( # noqa: PLR0915 if "response_format" in non_default_params: if provider_config is not None: - non_default_params[ - "response_format" - ] = provider_config.get_json_schema_from_pydantic_object( - response_format=non_default_params["response_format"] + non_default_params["response_format"] = ( + provider_config.get_json_schema_from_pydantic_object( + response_format=non_default_params["response_format"] + ) ) else: non_default_params["response_format"] = type_to_response_format_param( @@ -4055,9 +4064,9 @@ def _count_characters(text: str) -> int: def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str: - _choices: Union[ - List[Union[Choices, StreamingChoices]], List[StreamingChoices] - ] = response_obj.choices + _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = ( + response_obj.choices + ) response_str = "" for choice in _choices: @@ -4597,6 +4606,7 @@ def _get_model_info_helper( # noqa: PLR0915 "supports_native_streaming", None ), supports_web_search=_model_info.get("supports_web_search", False), + supports_reasoning=_model_info.get("supports_reasoning", False), search_context_cost_per_query=_model_info.get( "search_context_cost_per_query", None ), @@ -4669,6 +4679,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod supports_audio_output: Optional[bool] supports_pdf_input: Optional[bool] supports_web_search: Optional[bool] + supports_reasoning: Optional[bool] Raises: Exception: If the model is not mapped yet. @@ -6188,7 +6199,7 @@ def validate_chat_completion_user_messages(messages: List[AllMessageValues]): def validate_chat_completion_tool_choice( - tool_choice: Optional[Union[dict, str]] + tool_choice: Optional[Union[dict, str]], ) -> Optional[Union[dict, str]]: """ Confirm the tool choice is passed in the OpenAI format. diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 08e9a7791b..0696b5e18a 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -15,6 +15,7 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_system_messages": true, + "supports_reasoning": true, "supports_web_search": true, "search_context_cost_per_query": { "search_context_size_low": 0.0000, @@ -379,6 +380,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_native_streaming": false, + "supports_reasoning": true, "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] @@ -401,6 +403,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_native_streaming": false, + "supports_reasoning": true, "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] @@ -420,6 +423,7 @@ "supports_prompt_caching": true, "supports_system_messages": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "o1-mini": { @@ -448,6 +452,7 @@ "supports_vision": false, "supports_prompt_caching": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "o3-mini-2025-01-31": { @@ -464,6 +469,7 @@ "supports_vision": false, "supports_prompt_caching": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "o1-mini-2024-09-12": { @@ -476,6 +482,7 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true }, "o1-preview": { @@ -488,6 +495,7 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true }, "o1-preview-2024-09-12": { @@ -500,6 +508,7 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true }, "o1-2024-12-17": { @@ -517,6 +526,7 @@ "supports_prompt_caching": true, "supports_system_messages": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "chatgpt-4o-latest": { @@ -1416,6 +1426,7 @@ "cache_read_input_token_cost": 0.00000055, "litellm_provider": "azure", "mode": "chat", + "supports_reasoning": true, "supports_vision": false, "supports_prompt_caching": true, "supports_tool_choice": true @@ -1432,6 +1443,7 @@ "litellm_provider": "azure", "mode": "chat", "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true, "supports_tool_choice": true }, @@ -1447,6 +1459,7 @@ "litellm_provider": "azure", "mode": "chat", "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true, "supports_tool_choice": true }, @@ -1477,6 +1490,7 @@ "mode": "chat", "supports_vision": false, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true }, @@ -1492,6 +1506,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true }, "azure/o1-mini-2024-09-12": { @@ -1506,6 +1521,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true }, "azure/us/o1-mini-2024-09-12": { @@ -1552,6 +1568,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true, "supports_tool_choice": true }, @@ -1567,6 +1584,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, + "supports_reasoning": true, "supports_prompt_caching": true, "supports_tool_choice": true }, @@ -1612,6 +1630,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true }, "azure/o1-preview-2024-09-12": { @@ -1626,6 +1645,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, + "supports_reasoning": true, "supports_prompt_caching": true }, "azure/us/o1-preview-2024-09-12": { @@ -2284,6 +2304,7 @@ "litellm_provider": "azure_ai", "mode": "chat", "supports_tool_choice": true, + "supports_reasoning": true, "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367" }, "azure_ai/deepseek-v3": { @@ -2984,6 +3005,7 @@ "supports_function_calling": true, "supports_assistant_prefill": true, "supports_tool_choice": true, + "supports_reasoning": true, "supports_prompt_caching": true }, "deepseek/deepseek-chat": { @@ -3146,6 +3168,7 @@ "mode": "chat", "supports_function_calling": true, "supports_tool_choice": true, + "supports_reasoning": true, "supports_response_schema": false, "source": "https://x.ai/api#pricing" }, @@ -3159,6 +3182,7 @@ "mode": "chat", "supports_function_calling": true, "supports_tool_choice": true, + "supports_reasoning": true, "supports_response_schema": false, "source": "https://x.ai/api#pricing" }, @@ -3170,6 +3194,7 @@ "output_cost_per_token": 0.000004, "litellm_provider": "xai", "mode": "chat", + "supports_reasoning": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_response_schema": false, @@ -3245,6 +3270,7 @@ "mode": "chat", "supports_system_messages": false, "supports_function_calling": false, + "supports_reasoning": true, "supports_response_schema": false, "supports_tool_choice": true }, @@ -3756,7 +3782,8 @@ "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-06-01", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_reasoning": true }, "claude-3-7-sonnet-20250219": { "max_tokens": 128000, @@ -3776,7 +3803,8 @@ "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2026-02-01", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_reasoning": true }, "claude-3-5-sonnet-20241022": { "max_tokens": 8192, @@ -5354,6 +5382,7 @@ "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-06-01", + "supports_reasoning": true, "supports_tool_choice": true }, "vertex_ai/claude-3-haiku": { @@ -6671,6 +6700,7 @@ "mode": "chat", "supports_function_calling": true, "supports_assistant_prefill": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_prompt_caching": true }, @@ -6846,6 +6876,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_reasoning": true, "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, "supports_tool_choice": true @@ -6861,6 +6892,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_reasoning": true, "tool_use_system_prompt_tokens": 159, "supports_tool_choice": true }, @@ -7028,6 +7060,7 @@ "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, + "supports_reasoning": true, "supports_parallel_function_calling": true, "supports_vision": false, "supports_tool_choice": true @@ -7041,6 +7074,7 @@ "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, + "supports_reasoning": true, "supports_parallel_function_calling": true, "supports_vision": false, "supports_tool_choice": true @@ -7858,6 +7892,7 @@ "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, + "supports_reasoning": true, "supports_tool_choice": true }, "anthropic.claude-3-5-sonnet-20241022-v2:0": { @@ -7975,7 +8010,8 @@ "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_reasoning": true }, "us.anthropic.claude-3-haiku-20240307-v1:0": { "max_tokens": 4096, @@ -8734,6 +8770,7 @@ "output_cost_per_token": 0.0000054, "litellm_provider": "bedrock_converse", "mode": "chat", + "supports_reasoning": true, "supports_function_calling": false, "supports_tool_choice": false @@ -10560,7 +10597,8 @@ "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Claude 3.7 conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, "supports_assistant_prefill": true, "supports_function_calling": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_reasoning": true }, "databricks/databricks-meta-llama-3-1-405b-instruct": { "max_tokens": 128000, @@ -10818,6 +10856,7 @@ "max_input_tokens": 32768, "max_output_tokens": 8192, "litellm_provider": "snowflake", + "supports_reasoning": true, "mode": "chat" }, "snowflake/snowflake-arctic": { diff --git a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py index f2a5d6d80d..080aa3bd16 100644 --- a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py +++ b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py @@ -457,7 +457,7 @@ class TestSpendLogsPayload: "model": "gpt-4o", "user": "", "team_id": "", - "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}', + "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "supports_reasoning": false, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}', "cache_key": "Cache OFF", "spend": 0.00022500000000000002, "total_tokens": 30, @@ -555,7 +555,7 @@ class TestSpendLogsPayload: "model": "claude-3-7-sonnet-20250219", "user": "", "team_id": "", - "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}', + "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "supports_reasoning": true, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}', "cache_key": "Cache OFF", "spend": 0.01383, "total_tokens": 2598, @@ -651,7 +651,7 @@ class TestSpendLogsPayload: "model": "claude-3-7-sonnet-20250219", "user": "", "team_id": "", - "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}', + "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "supports_reasoning": true, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}', "cache_key": "Cache OFF", "spend": 0.01383, "total_tokens": 2598, diff --git a/tests/litellm_utils_tests/test_utils.py b/tests/litellm_utils_tests/test_utils.py index 3088fa250f..0ffc6e1421 100644 --- a/tests/litellm_utils_tests/test_utils.py +++ b/tests/litellm_utils_tests/test_utils.py @@ -514,6 +514,26 @@ def test_supports_web_search(model, expected_bool): pytest.fail(f"Error occurred: {e}") +@pytest.mark.parametrize( + "model, expected_bool", + [ + ("openai/o3-mini", True), + ("o3-mini", True), + ("xai/grok-3-mini-beta", True), + ("xai/grok-3-mini-fast-beta", True), + ("xai/grok-2", False), + ("gpt-3.5-turbo", False), + ], +) +def test_supports_reasoning(model, expected_bool): + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + try: + assert litellm.supports_reasoning(model=model) == expected_bool + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_get_max_token_unit_test(): """ More complete testing in `test_completion_cost.py` diff --git a/tests/llm_translation/test_xai.py b/tests/llm_translation/test_xai.py index 3846a4f1f0..afe4a3c0b9 100644 --- a/tests/llm_translation/test_xai.py +++ b/tests/llm_translation/test_xai.py @@ -160,3 +160,24 @@ def test_xai_message_name_filtering(): ) assert response is not None assert response.choices[0].message.content is not None + + +def test_xai_reasoning_effort(): + litellm._turn_on_debug() + messages = [ + { + "role": "system", + "content": "*I press the green button*", + "name": "example_user" + }, + {"role": "user", "content": "Hello", "name": "John"}, + {"role": "assistant", "content": "Hello", "name": "Jane"}, + ] + response = completion( + model="xai/grok-3", + messages=messages, + reasoning_effort="high", + stream=True, + ) + for chunk in response: + print(chunk) diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py index 15e80e831f..eccd34f7f9 100644 --- a/tests/local_testing/test_get_model_info.py +++ b/tests/local_testing/test_get_model_info.py @@ -510,6 +510,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "supports_video_input": {"type": "boolean"}, "supports_vision": {"type": "boolean"}, "supports_web_search": {"type": "boolean"}, + "supports_reasoning": {"type": "boolean"}, "tool_use_system_prompt_tokens": {"type": "number"}, "tpm": {"type": "number"}, "supported_endpoints": { diff --git a/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json b/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json index 3a00b107d2..1157d6919e 100644 --- a/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json +++ b/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json @@ -9,7 +9,7 @@ "model": "gpt-4o", "user": "", "team_id": "", - "metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"usage_object\": {\"completion_tokens\": 20, \"prompt_tokens\": 10, \"total_tokens\": 30, \"completion_tokens_details\": null, \"prompt_tokens_details\": null}, \"model_map_information\": {\"model_map_key\": \"gpt-4o\", \"model_map_value\": {\"key\": \"gpt-4o\", \"max_tokens\": 16384, \"max_input_tokens\": 128000, \"max_output_tokens\": 16384, \"input_cost_per_token\": 2.5e-06, \"cache_creation_input_token_cost\": null, \"cache_read_input_token_cost\": 1.25e-06, \"input_cost_per_character\": null, \"input_cost_per_token_above_128k_tokens\": null, \"input_cost_per_token_above_200k_tokens\": null, \"input_cost_per_query\": null, \"input_cost_per_second\": null, \"input_cost_per_audio_token\": null, \"input_cost_per_token_batches\": 1.25e-06, \"output_cost_per_token_batches\": 5e-06, \"output_cost_per_token\": 1e-05, \"output_cost_per_audio_token\": null, \"output_cost_per_character\": null, \"output_cost_per_token_above_128k_tokens\": null, \"output_cost_per_character_above_128k_tokens\": null, \"output_cost_per_token_above_200k_tokens\": null, \"output_cost_per_second\": null, \"output_cost_per_image\": null, \"output_vector_size\": null, \"litellm_provider\": \"openai\", \"mode\": \"chat\", \"supports_system_messages\": true, \"supports_response_schema\": true, \"supports_vision\": true, \"supports_function_calling\": true, \"supports_tool_choice\": true, \"supports_assistant_prefill\": false, \"supports_prompt_caching\": true, \"supports_audio_input\": false, \"supports_audio_output\": false, \"supports_pdf_input\": false, \"supports_embedding_image_input\": false, \"supports_native_streaming\": null, \"supports_web_search\": true, \"search_context_cost_per_query\": {\"search_context_size_low\": 0.03, \"search_context_size_medium\": 0.035, \"search_context_size_high\": 0.05}, \"tpm\": null, \"rpm\": null, \"supported_openai_params\": [\"frequency_penalty\", \"logit_bias\", \"logprobs\", \"top_logprobs\", \"max_tokens\", \"max_completion_tokens\", \"modalities\", \"prediction\", \"n\", \"presence_penalty\", \"seed\", \"stop\", \"stream\", \"stream_options\", \"temperature\", \"top_p\", \"tools\", \"tool_choice\", \"function_call\", \"functions\", \"max_retries\", \"extra_headers\", \"parallel_tool_calls\", \"audio\", \"response_format\", \"user\"]}}, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}", + "metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"usage_object\": {\"completion_tokens\": 20, \"prompt_tokens\": 10, \"total_tokens\": 30, \"completion_tokens_details\": null, \"prompt_tokens_details\": null}, \"model_map_information\": {\"model_map_key\": \"gpt-4o\", \"model_map_value\": {\"key\": \"gpt-4o\", \"max_tokens\": 16384, \"max_input_tokens\": 128000, \"max_output_tokens\": 16384, \"input_cost_per_token\": 2.5e-06, \"cache_creation_input_token_cost\": null, \"cache_read_input_token_cost\": 1.25e-06, \"input_cost_per_character\": null, \"input_cost_per_token_above_128k_tokens\": null, \"input_cost_per_token_above_200k_tokens\": null, \"input_cost_per_query\": null, \"input_cost_per_second\": null, \"input_cost_per_audio_token\": null, \"input_cost_per_token_batches\": 1.25e-06, \"output_cost_per_token_batches\": 5e-06, \"output_cost_per_token\": 1e-05, \"output_cost_per_audio_token\": null, \"output_cost_per_character\": null, \"output_cost_per_token_above_128k_tokens\": null, \"output_cost_per_character_above_128k_tokens\": null, \"output_cost_per_token_above_200k_tokens\": null, \"output_cost_per_second\": null, \"output_cost_per_image\": null, \"output_vector_size\": null, \"litellm_provider\": \"openai\", \"mode\": \"chat\", \"supports_system_messages\": true, \"supports_response_schema\": true, \"supports_vision\": true, \"supports_function_calling\": true, \"supports_tool_choice\": true, \"supports_assistant_prefill\": false, \"supports_prompt_caching\": true, \"supports_audio_input\": false, \"supports_audio_output\": false, \"supports_pdf_input\": false, \"supports_embedding_image_input\": false, \"supports_native_streaming\": null, \"supports_web_search\": true, \"supports_reasoning\": false, \"search_context_cost_per_query\": {\"search_context_size_low\": 0.03, \"search_context_size_medium\": 0.035, \"search_context_size_high\": 0.05}, \"tpm\": null, \"rpm\": null, \"supported_openai_params\": [\"frequency_penalty\", \"logit_bias\", \"logprobs\", \"top_logprobs\", \"max_tokens\", \"max_completion_tokens\", \"modalities\", \"prediction\", \"n\", \"presence_penalty\", \"seed\", \"stop\", \"stream\", \"stream_options\", \"temperature\", \"top_p\", \"tools\", \"tool_choice\", \"function_call\", \"functions\", \"max_retries\", \"extra_headers\", \"parallel_tool_calls\", \"audio\", \"response_format\", \"user\"]}}, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}", "cache_key": "Cache OFF", "spend": 0.00022500000000000002, "total_tokens": 30,