fix(vertex_and_google_ai_studio_gemini.py): allow thinking budget = 0

Fixes https://github.com/BerriAI/litellm/issues/10121
2025-04-24 18:24:20 +00:00 · 2025-04-21 19:30:17 -07:00 · 2025-04-21 19:30:17 -07:00 · e434ccc7e1
commit e434ccc7e1
parent 4a50cf10fb
2 changed files with 20 additions and 2 deletions
--- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
@ -390,7 +390,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
        params: GeminiThinkingConfig = {}
        if thinking_enabled:
            params["includeThoughts"] = True
-        if thinking_budget:
+        if thinking_budget is not None and isinstance(thinking_budget, int):
            params["thinkingBudget"] = thinking_budget

        return params
--- a/tests/llm_translation/test_gemini.py
+++ b/tests/llm_translation/test_gemini.py
@ -116,4 +116,22 @@ def test_gemini_thinking():
        messages=messages, # make sure call works
    )
    print(response.choices[0].message)
-    assert response.choices[0].message.content is not None
+    assert response.choices[0].message.content is not None
+
+
+def test_gemini_thinking_budget_0():
+    litellm._turn_on_debug()
+    from litellm.types.utils import Message, CallTypes
+    from litellm.utils import return_raw_request
+    import json
+
+    raw_request = return_raw_request(
+        endpoint=CallTypes.completion,
+        kwargs={
+            "model": "gemini/gemini-2.5-flash-preview-04-17",
+            "messages": [{"role": "user", "content": "Explain the concept of Occam's Razor and provide a simple, everyday example"}],
+            "thinking": {"type": "enabled", "budget_tokens": 0}
+        }
+    )
+    print(raw_request)
+    assert "0" in json.dumps(raw_request["raw_request_body"])