Support max_completion_tokens on Mistral (#9589)

* Support max_completion_tokens on Mistral * test fix
2025-04-27 11:43:54 +00:00 · 2025-03-27 20:27:19 -04:00 · 2025-03-27 20:27:19 -04:00 · fef5d23dd5
commit fef5d23dd5
parent fb83567a03
2 changed files with 53 additions and 1 deletions
--- a/litellm/llms/mistral/mistral_chat_transformation.py
+++ b/litellm/llms/mistral/mistral_chat_transformation.py
@ -28,7 +28,9 @@ class MistralConfig(OpenAIGPTConfig):

    - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling. API Default - 1.

-    - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion. API Default - null.
+    - `max_tokens` [DEPRECATED - use max_completion_tokens] (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion. API Default - null.
+
+    - `max_completion_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion. API Default - null.

    - `tools` (list or null): A list of available tools for the model. Use this to specify functions for which the model can generate JSON inputs.

@ -46,6 +48,7 @@ class MistralConfig(OpenAIGPTConfig):
    temperature: Optional[int] = None
    top_p: Optional[int] = None
    max_tokens: Optional[int] = None
+    max_completion_tokens: Optional[int] = None
    tools: Optional[list] = None
    tool_choice: Optional[Literal["auto", "any", "none"]] = None
    random_seed: Optional[int] = None
@ -58,6 +61,7 @@ class MistralConfig(OpenAIGPTConfig):
        temperature: Optional[int] = None,
        top_p: Optional[int] = None,
        max_tokens: Optional[int] = None,
+        max_completion_tokens: Optional[int] = None,
        tools: Optional[list] = None,
        tool_choice: Optional[Literal["auto", "any", "none"]] = None,
        random_seed: Optional[int] = None,
@ -80,6 +84,7 @@ class MistralConfig(OpenAIGPTConfig):
            "temperature",
            "top_p",
            "max_tokens",
+            "max_completion_tokens"
            "tools",
            "tool_choice",
            "seed",
@ -105,6 +110,8 @@ class MistralConfig(OpenAIGPTConfig):
        for param, value in non_default_params.items():
            if param == "max_tokens":
                optional_params["max_tokens"] = value
+            if param == "max_completion_tokens": # max_completion_tokens should take priority
+                optional_params["max_tokens"] = value
            if param == "tools":
                optional_params["tools"] = value
            if param == "stream" and value is True:
--- a/tests/litellm/llms/mistral/test_mistral_transformation.py
+++ b/tests/litellm/llms/mistral/test_mistral_transformation.py
@ -0,0 +1,45 @@
+import os
+import sys
+from unittest.mock import MagicMock
+
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.mistral.mistral_chat_transformation import MistralConfig
+
+
+class TestMistralTransform:
+    def setup_method(self):
+        self.config = MistralConfig()
+        self.model = "mistral-small-latest"
+        self.logging_obj = MagicMock()
+
+    def test_map_mistral_params(self):
+        """Test that parameters are correctly mapped"""
+        test_params = {"temperature": 0.7, "max_tokens": 200, "max_completion_tokens": 256}
+
+        result = self.config.map_openai_params(
+            non_default_params=test_params,
+            optional_params={},
+            model=self.model,
+            drop_params=False,
+        )
+
+        # The function should properly map max_completion_tokens to max_tokens and override max_tokens
+        assert result == {"temperature": 0.7, "max_tokens": 256}
+
+    def test_mistral_max_tokens_backward_compat(self):
+        """Test that parameters are correctly mapped"""
+        test_params = {"temperature": 0.7, "max_tokens": 200,}
+
+        result = self.config.map_openai_params(
+            non_default_params=test_params,
+            optional_params={},
+            model=self.model,
+            drop_params=False,
+        )
+
+        # The function should properly map max_tokens if max_completion_tokens is not provided
+        assert result == {"temperature": 0.7, "max_tokens": 200}