diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index d4c74f4910..c2f04edae0 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -390,7 +390,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): params: GeminiThinkingConfig = {} if thinking_enabled: params["includeThoughts"] = True - if thinking_budget: + if thinking_budget is not None and isinstance(thinking_budget, int): params["thinkingBudget"] = thinking_budget return params diff --git a/tests/llm_translation/test_gemini.py b/tests/llm_translation/test_gemini.py index 35aa22722e..475c4f03b7 100644 --- a/tests/llm_translation/test_gemini.py +++ b/tests/llm_translation/test_gemini.py @@ -116,4 +116,22 @@ def test_gemini_thinking(): messages=messages, # make sure call works ) print(response.choices[0].message) - assert response.choices[0].message.content is not None \ No newline at end of file + assert response.choices[0].message.content is not None + + +def test_gemini_thinking_budget_0(): + litellm._turn_on_debug() + from litellm.types.utils import Message, CallTypes + from litellm.utils import return_raw_request + import json + + raw_request = return_raw_request( + endpoint=CallTypes.completion, + kwargs={ + "model": "gemini/gemini-2.5-flash-preview-04-17", + "messages": [{"role": "user", "content": "Explain the concept of Occam's Razor and provide a simple, everyday example"}], + "thinking": {"type": "enabled", "budget_tokens": 0} + } + ) + print(raw_request) + assert "0" in json.dumps(raw_request["raw_request_body"]) \ No newline at end of file