mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
* remove unused imports * fix AmazonConverseConfig * fix test * fix import * ruff check fixes * test fixes * fix testing * fix imports
134 lines
4.5 KiB
Python
134 lines
4.5 KiB
Python
"""
|
|
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
|
|
|
|
This is OpenAI compatible
|
|
|
|
This file only contains param mapping logic
|
|
|
|
API calling is done using the OpenAI SDK with an api_base
|
|
"""
|
|
|
|
from typing import Optional, Union
|
|
|
|
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
|
|
|
|
|
|
class NvidiaNimConfig(OpenAIGPTConfig):
|
|
"""
|
|
Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
|
|
|
|
The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters:
|
|
"""
|
|
|
|
temperature: Optional[int] = None
|
|
top_p: Optional[int] = None
|
|
frequency_penalty: Optional[int] = None
|
|
presence_penalty: Optional[int] = None
|
|
max_tokens: Optional[int] = None
|
|
stop: Optional[Union[str, list]] = None
|
|
|
|
def __init__(
|
|
self,
|
|
temperature: Optional[int] = None,
|
|
top_p: Optional[int] = None,
|
|
frequency_penalty: Optional[int] = None,
|
|
presence_penalty: Optional[int] = None,
|
|
max_tokens: Optional[int] = None,
|
|
stop: Optional[Union[str, list]] = None,
|
|
) -> None:
|
|
locals_ = locals().copy()
|
|
for key, value in locals_.items():
|
|
if key != "self" and value is not None:
|
|
setattr(self.__class__, key, value)
|
|
|
|
@classmethod
|
|
def get_config(cls):
|
|
return super().get_config()
|
|
|
|
def get_supported_openai_params(self, model: str) -> list:
|
|
"""
|
|
Get the supported OpenAI params for the given model
|
|
|
|
|
|
Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference
|
|
"""
|
|
if model in [
|
|
"google/recurrentgemma-2b",
|
|
"google/gemma-2-27b-it",
|
|
"google/gemma-2-9b-it",
|
|
"gemma-2-9b-it",
|
|
]:
|
|
return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"]
|
|
elif model == "nvidia/nemotron-4-340b-instruct":
|
|
return [
|
|
"stream",
|
|
"temperature",
|
|
"top_p",
|
|
"max_tokens",
|
|
"max_completion_tokens",
|
|
]
|
|
elif model == "nvidia/nemotron-4-340b-reward":
|
|
return [
|
|
"stream",
|
|
]
|
|
elif model in ["google/codegemma-1.1-7b"]:
|
|
# most params - but no 'seed' :(
|
|
return [
|
|
"stream",
|
|
"temperature",
|
|
"top_p",
|
|
"frequency_penalty",
|
|
"presence_penalty",
|
|
"max_tokens",
|
|
"max_completion_tokens",
|
|
"stop",
|
|
]
|
|
else:
|
|
# DEFAULT Case - The vast majority of Nvidia NIM Models lie here
|
|
# "upstage/solar-10.7b-instruct",
|
|
# "snowflake/arctic",
|
|
# "seallms/seallm-7b-v2.5",
|
|
# "nvidia/llama3-chatqa-1.5-8b",
|
|
# "nvidia/llama3-chatqa-1.5-70b",
|
|
# "mistralai/mistral-large",
|
|
# "mistralai/mixtral-8x22b-instruct-v0.1",
|
|
# "mistralai/mixtral-8x7b-instruct-v0.1",
|
|
# "mistralai/mistral-7b-instruct-v0.3",
|
|
# "mistralai/mistral-7b-instruct-v0.2",
|
|
# "mistralai/codestral-22b-instruct-v0.1",
|
|
# "microsoft/phi-3-small-8k-instruct",
|
|
# "microsoft/phi-3-small-128k-instruct",
|
|
# "microsoft/phi-3-mini-4k-instruct",
|
|
# "microsoft/phi-3-mini-128k-instruct",
|
|
# "microsoft/phi-3-medium-4k-instruct",
|
|
# "microsoft/phi-3-medium-128k-instruct",
|
|
# "meta/llama3-70b-instruct",
|
|
# "meta/llama3-8b-instruct",
|
|
# "meta/llama2-70b",
|
|
# "meta/codellama-70b",
|
|
return [
|
|
"stream",
|
|
"temperature",
|
|
"top_p",
|
|
"frequency_penalty",
|
|
"presence_penalty",
|
|
"max_tokens",
|
|
"max_completion_tokens",
|
|
"stop",
|
|
"seed",
|
|
]
|
|
|
|
def map_openai_params(
|
|
self,
|
|
non_default_params: dict,
|
|
optional_params: dict,
|
|
model: str,
|
|
drop_params: bool,
|
|
) -> dict:
|
|
supported_openai_params = self.get_supported_openai_params(model=model)
|
|
for param, value in non_default_params.items():
|
|
if param == "max_completion_tokens":
|
|
optional_params["max_tokens"] = value
|
|
elif param in supported_openai_params:
|
|
optional_params[param] = value
|
|
return optional_params
|