[Bug Fix] caching does not account for thinking or reasoning_effort config (#10140)

* _get_litellm_supported_chat_completion_kwargs * test caching with thinking
2025-04-25 02:34:29 +00:00 · 2025-04-21 22:39:40 -07:00 · 2025-04-21 22:39:40 -07:00 · 7cb95bcc96
commit 7cb95bcc96
parent 104e4cb1bc
2 changed files with 75 additions and 3 deletions
--- a/litellm/litellm_core_utils/model_param_helper.py
+++ b/litellm/litellm_core_utils/model_param_helper.py
@ -75,6 +75,10 @@ class ModelParamHelper:
        combined_kwargs = combined_kwargs.difference(exclude_kwargs)
        return combined_kwargs

+    @staticmethod
+    def get_litellm_provider_specific_params_for_chat_params() -> Set[str]:
+        return set(["thinking"])
+
    @staticmethod
    def _get_litellm_supported_chat_completion_kwargs() -> Set[str]:
        """
@ -82,11 +86,18 @@ class ModelParamHelper:

        This follows the OpenAI API Spec
        """
-        all_chat_completion_kwargs = set(
+        non_streaming_params: Set[str] = set(
            getattr(CompletionCreateParamsNonStreaming, "__annotations__", {}).keys()
-        ).union(
-            set(getattr(CompletionCreateParamsStreaming, "__annotations__", {}).keys())
        )
+        streaming_params: Set[str] = set(
+            getattr(CompletionCreateParamsStreaming, "__annotations__", {}).keys()
+        )
+        litellm_provider_specific_params: Set[str] = (
+            ModelParamHelper.get_litellm_provider_specific_params_for_chat_params()
+        )
+        all_chat_completion_kwargs: Set[str] = non_streaming_params.union(
+            streaming_params
+        ).union(litellm_provider_specific_params)
        return all_chat_completion_kwargs

    @staticmethod