fix: Avoid BadRequestError due to invalid max_tokens (#3667)

This patch ensures if max tokens is not defined, then is set to None instead of 0 when calling openai_chat_completion. This way some providers (like gemini) that cannot handle the `max_tokens = 0` will not fail Issue: #3666
2025-12-03 09:53:45 +00:00 · 2025-10-27 17:27:21 +01:00 · 2025-10-27 17:27:21 +01:00 · f18b5eb537
commit f18b5eb537
parent 00d8414597
171 changed files with 12728 additions and 8 deletions
--- a/tests/integration/eval/test_eval.py
+++ b/tests/integration/eval/test_eval.py
@ -55,6 +55,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
                "model": text_model_id,
                "sampling_params": {
                    "temperature": 0.0,
+                    "max_tokens": 512,
                },
            },
        },
@ -88,6 +89,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
                "model": text_model_id,
                "sampling_params": {
                    "temperature": 0.0,
+                    "max_tokens": 512,
                },
            },
        },