fix nvidia nim param mapping

2024-07-05 14:44:12 -07:00 · 2024-07-05 14:44:12 -07:00 · e5c49548ea
commit e5c49548ea
parent c99a5a58ec
3 changed files with 77 additions and 14 deletions
--- a/litellm/llms/nvidia_nim.py
+++ b/litellm/llms/nvidia_nim.py
@ -58,21 +58,80 @@ class NvidiaNimConfig:
            and v is not None
        }

-    def get_supported_openai_params(self):
-        return [
-            "stream",
-            "temperature",
-            "top_p",
-            "frequency_penalty",
-            "presence_penalty",
-            "max_tokens",
-            "stop",
-        ]
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Get the supported OpenAI params for the given model
+
+
+        Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference
+        """
+        if model in [
+            "google/recurrentgemma-2b",
+            "google/gemma-2-27b-it",
+            "google/gemma-2-9b-it",
+            "gemma-2-9b-it",
+        ]:
+            return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"]
+        elif model == "nvidia/nemotron-4-340b-instruct":
+            return [
+                "stream",
+                "temperature",
+                "top_p",
+                "max_tokens",
+            ]
+        elif model == "nvidia/nemotron-4-340b-reward":
+            return [
+                "stream",
+            ]
+        elif model in ["google/codegemma-1.1-7b"]:
+            # most params - but no 'seed' :(
+            return [
+                "stream",
+                "temperature",
+                "top_p",
+                "frequency_penalty",
+                "presence_penalty",
+                "max_tokens",
+                "stop",
+            ]
+        else:
+            # DEFAULT Case - The vast majority of Nvidia NIM Models lie here
+            # "upstage/solar-10.7b-instruct",
+            # "snowflake/arctic",
+            # "seallms/seallm-7b-v2.5",
+            # "nvidia/llama3-chatqa-1.5-8b",
+            # "nvidia/llama3-chatqa-1.5-70b",
+            # "mistralai/mistral-large",
+            # "mistralai/mixtral-8x22b-instruct-v0.1",
+            # "mistralai/mixtral-8x7b-instruct-v0.1",
+            # "mistralai/mistral-7b-instruct-v0.3",
+            # "mistralai/mistral-7b-instruct-v0.2",
+            # "mistralai/codestral-22b-instruct-v0.1",
+            # "microsoft/phi-3-small-8k-instruct",
+            # "microsoft/phi-3-small-128k-instruct",
+            # "microsoft/phi-3-mini-4k-instruct",
+            # "microsoft/phi-3-mini-128k-instruct",
+            # "microsoft/phi-3-medium-4k-instruct",
+            # "microsoft/phi-3-medium-128k-instruct",
+            # "meta/llama3-70b-instruct",
+            # "meta/llama3-8b-instruct",
+            # "meta/llama2-70b",
+            # "meta/codellama-70b",
+            return [
+                "stream",
+                "temperature",
+                "top_p",
+                "frequency_penalty",
+                "presence_penalty",
+                "max_tokens",
+                "stop",
+                "seed",
+            ]

    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict
+        self, model: str, non_default_params: dict, optional_params: dict
    ) -> dict:
-        supported_openai_params = self.get_supported_openai_params()
+        supported_openai_params = self.get_supported_openai_params(model=model)
        for param, value in non_default_params.items():
            if param in supported_openai_params:
                optional_params[param] = value
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -3602,6 +3602,8 @@ def test_completion_nvidia_nim():
                    "content": "What's the weather like in Boston today in Fahrenheit?",
                }
            ],
+            presence_penalty=0.5,
+            frequency_penalty=0.1,
        )
        # Add any assertions here to check the response
        print(response)
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -3184,7 +3184,9 @@ def get_optional_params(
        )
        _check_valid_arg(supported_params=supported_params)
        optional_params = litellm.NvidiaNimConfig().map_openai_params(
-            non_default_params=non_default_params, optional_params=optional_params
+            model=model,
+            non_default_params=non_default_params,
+            optional_params=optional_params,
        )
    elif custom_llm_provider == "fireworks_ai":
        supported_params = get_supported_openai_params(
@ -3776,7 +3778,7 @@ def get_supported_openai_params(
    elif custom_llm_provider == "fireworks_ai":
        return litellm.FireworksAIConfig().get_supported_openai_params()
    elif custom_llm_provider == "nvidia_nim":
-        return litellm.NvidiaNimConfig().get_supported_openai_params()
+        return litellm.NvidiaNimConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "volcengine":
        return litellm.VolcEngineConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "groq":