diff --git a/litellm/llms/nvidia_nim.py b/litellm/llms/nvidia_nim.py index ebcc84c13..6d2e4316b 100644 --- a/litellm/llms/nvidia_nim.py +++ b/litellm/llms/nvidia_nim.py @@ -58,21 +58,80 @@ class NvidiaNimConfig: and v is not None } - def get_supported_openai_params(self): - return [ - "stream", - "temperature", - "top_p", - "frequency_penalty", - "presence_penalty", - "max_tokens", - "stop", - ] + def get_supported_openai_params(self, model: str) -> list: + """ + Get the supported OpenAI params for the given model + + + Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference + """ + if model in [ + "google/recurrentgemma-2b", + "google/gemma-2-27b-it", + "google/gemma-2-9b-it", + "gemma-2-9b-it", + ]: + return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"] + elif model == "nvidia/nemotron-4-340b-instruct": + return [ + "stream", + "temperature", + "top_p", + "max_tokens", + ] + elif model == "nvidia/nemotron-4-340b-reward": + return [ + "stream", + ] + elif model in ["google/codegemma-1.1-7b"]: + # most params - but no 'seed' :( + return [ + "stream", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "max_tokens", + "stop", + ] + else: + # DEFAULT Case - The vast majority of Nvidia NIM Models lie here + # "upstage/solar-10.7b-instruct", + # "snowflake/arctic", + # "seallms/seallm-7b-v2.5", + # "nvidia/llama3-chatqa-1.5-8b", + # "nvidia/llama3-chatqa-1.5-70b", + # "mistralai/mistral-large", + # "mistralai/mixtral-8x22b-instruct-v0.1", + # "mistralai/mixtral-8x7b-instruct-v0.1", + # "mistralai/mistral-7b-instruct-v0.3", + # "mistralai/mistral-7b-instruct-v0.2", + # "mistralai/codestral-22b-instruct-v0.1", + # "microsoft/phi-3-small-8k-instruct", + # "microsoft/phi-3-small-128k-instruct", + # "microsoft/phi-3-mini-4k-instruct", + # "microsoft/phi-3-mini-128k-instruct", + # "microsoft/phi-3-medium-4k-instruct", + # "microsoft/phi-3-medium-128k-instruct", + # "meta/llama3-70b-instruct", + # "meta/llama3-8b-instruct", + # "meta/llama2-70b", + # "meta/codellama-70b", + return [ + "stream", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "max_tokens", + "stop", + "seed", + ] def map_openai_params( - self, non_default_params: dict, optional_params: dict + self, model: str, non_default_params: dict, optional_params: dict ) -> dict: - supported_openai_params = self.get_supported_openai_params() + supported_openai_params = self.get_supported_openai_params(model=model) for param, value in non_default_params.items(): if param in supported_openai_params: optional_params[param] = value diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 40c15d06d..0598c52df 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -3602,6 +3602,8 @@ def test_completion_nvidia_nim(): "content": "What's the weather like in Boston today in Fahrenheit?", } ], + presence_penalty=0.5, + frequency_penalty=0.1, ) # Add any assertions here to check the response print(response) diff --git a/litellm/utils.py b/litellm/utils.py index 1010beb96..a28531b6c 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3184,7 +3184,9 @@ def get_optional_params( ) _check_valid_arg(supported_params=supported_params) optional_params = litellm.NvidiaNimConfig().map_openai_params( - non_default_params=non_default_params, optional_params=optional_params + model=model, + non_default_params=non_default_params, + optional_params=optional_params, ) elif custom_llm_provider == "fireworks_ai": supported_params = get_supported_openai_params( @@ -3776,7 +3778,7 @@ def get_supported_openai_params( elif custom_llm_provider == "fireworks_ai": return litellm.FireworksAIConfig().get_supported_openai_params() elif custom_llm_provider == "nvidia_nim": - return litellm.NvidiaNimConfig().get_supported_openai_params() + return litellm.NvidiaNimConfig().get_supported_openai_params(model=model) elif custom_llm_provider == "volcengine": return litellm.VolcEngineConfig().get_supported_openai_params(model=model) elif custom_llm_provider == "groq":