litellm-mirror/litellm/llms/nvidia_nim.py
Ishaan Jaff c93364ab2e [Feat] Add max_completion_tokens param (#5691)
* add max_completion_tokens

* add max_completion_tokens

* add max_completion_tokens support for OpenAI models

* add max_completion_tokens param

* add max_completion_tokens for bedrock converse models

* add test for converse maxTokens

* fix openai o1 param mapping test

* move test optional params

* add max_completion_tokens for anthropic api

* fix conftest

* add max_completion tokens for vertex ai partner models

* add max_completion_tokens for fireworks ai

* add max_completion_tokens for hf rest api

* add test for param mapping

* add param mapping for vertex, gemini + testing

* predibase is the most unstable and unusable llm api in prod, can't handle our ci/cd

* add max_completion_tokens to openai supported params

* fix fireworks ai param mapping
2024-09-14 14:57:01 -07:00

143 lines
4.7 KiB
Python

"""
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
This is OpenAI compatible
This file only contains param mapping logic
API calling is done using the OpenAI SDK with an api_base
"""
import types
from typing import Optional, Union
class NvidiaNimConfig:
"""
Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters:
"""
temperature: Optional[int] = None
top_p: Optional[int] = None
frequency_penalty: Optional[int] = None
presence_penalty: Optional[int] = None
max_tokens: Optional[int] = None
stop: Optional[Union[str, list]] = None
def __init__(
self,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
frequency_penalty: Optional[int] = None,
presence_penalty: Optional[int] = None,
max_tokens: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self, model: str) -> list:
"""
Get the supported OpenAI params for the given model
Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference
"""
if model in [
"google/recurrentgemma-2b",
"google/gemma-2-27b-it",
"google/gemma-2-9b-it",
"gemma-2-9b-it",
]:
return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"]
elif model == "nvidia/nemotron-4-340b-instruct":
return [
"stream",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
]
elif model == "nvidia/nemotron-4-340b-reward":
return [
"stream",
]
elif model in ["google/codegemma-1.1-7b"]:
# most params - but no 'seed' :(
return [
"stream",
"temperature",
"top_p",
"frequency_penalty",
"presence_penalty",
"max_tokens",
"max_completion_tokens",
"stop",
]
else:
# DEFAULT Case - The vast majority of Nvidia NIM Models lie here
# "upstage/solar-10.7b-instruct",
# "snowflake/arctic",
# "seallms/seallm-7b-v2.5",
# "nvidia/llama3-chatqa-1.5-8b",
# "nvidia/llama3-chatqa-1.5-70b",
# "mistralai/mistral-large",
# "mistralai/mixtral-8x22b-instruct-v0.1",
# "mistralai/mixtral-8x7b-instruct-v0.1",
# "mistralai/mistral-7b-instruct-v0.3",
# "mistralai/mistral-7b-instruct-v0.2",
# "mistralai/codestral-22b-instruct-v0.1",
# "microsoft/phi-3-small-8k-instruct",
# "microsoft/phi-3-small-128k-instruct",
# "microsoft/phi-3-mini-4k-instruct",
# "microsoft/phi-3-mini-128k-instruct",
# "microsoft/phi-3-medium-4k-instruct",
# "microsoft/phi-3-medium-128k-instruct",
# "meta/llama3-70b-instruct",
# "meta/llama3-8b-instruct",
# "meta/llama2-70b",
# "meta/codellama-70b",
return [
"stream",
"temperature",
"top_p",
"frequency_penalty",
"presence_penalty",
"max_tokens",
"max_completion_tokens",
"stop",
"seed",
]
def map_openai_params(
self, model: str, non_default_params: dict, optional_params: dict
) -> dict:
supported_openai_params = self.get_supported_openai_params(model=model)
for param, value in non_default_params.items():
if param == "max_completion_tokens":
optional_params["max_tokens"] = value
elif param in supported_openai_params:
optional_params[param] = value
return optional_params