Clean up instructions and implementation; reorganize notebooks

This commit is contained in:
Jash Gulabrai 2025-04-18 16:27:19 -04:00
parent 0d9d333a4e
commit 4131e8146f
29 changed files with 2756 additions and 89 deletions

View file

@ -86,7 +86,7 @@ class NVIDIAEvalImpl(
if benchmark_config.eval_candidate.type == "model"
else benchmark_config.eval_candidate.config.model
)
nvidia_model = self.get_provider_model_id(model)
nvidia_model = self.get_provider_model_id(model) or model
result = await self._evaluator_post(
"/v1/evaluation/jobs",

View file

@ -47,10 +47,15 @@ class NVIDIAConfig(BaseModel):
default=60,
description="Timeout for the HTTP requests",
)
append_api_version: bool = Field(
default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
)
@classmethod
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
return {
"url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}",
"api_key": "${env.NVIDIA_API_KEY:}",
"append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}",
}

View file

@ -42,10 +42,7 @@ from llama_stack.apis.inference.inference import (
OpenAIResponseFormatParam,
)
from llama_stack.apis.models import Model, ModelType
from llama_stack.models.llama.datatypes import (
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
from llama_stack.providers.utils.inference import (
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
)
@ -126,15 +123,10 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
"meta/llama-3.2-90b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct",
}
# add /v1 in case of hosted models
base_url = self._config.url
if _is_nvidia_hosted(self._config):
if provider_model_id in special_model_urls:
base_url = special_model_urls[provider_model_id]
else:
base_url = f"{self._config.url}/v1"
elif "nim.int.aire.nvidia.com" in base_url:
base_url = f"{base_url}/v1"
base_url = f"{self._config.url}/v1" if self._config.append_api_version else self._config.url
if _is_nvidia_hosted(self._config) and provider_model_id in special_model_urls:
base_url = special_model_urls[provider_model_id]
return _get_client_for_base_url(base_url)
async def completion(
@ -258,9 +250,10 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
# await check_health(self._config) # this raises errors
provider_model_id = self.get_provider_model_id(model_id)
print(f"provider_model_id: {provider_model_id}")
request = await convert_chat_completion_request(
request=ChatCompletionRequest(
model=provider_model_id,
model=self.get_provider_model_id(model_id),
messages=messages,
sampling_params=sampling_params,
response_format=response_format,

View file

@ -392,14 +392,15 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
# Handle LoRA-specific configuration
if algorithm_config:
if algorithm_config.get("type") == "LoRA":
warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config")
algorithm_config_dict = algorithm_config.model_dump()
if algorithm_config_dict.get("type") == "LoRA":
warn_unsupported_params(algorithm_config_dict, supported_params["lora_config"], "LoRA config")
job_config["hyperparameters"]["lora"] = {
k: v
for k, v in {
"adapter_dim": algorithm_config.get("adapter_dim"),
"alpha": algorithm_config.get("alpha"),
"adapter_dropout": algorithm_config.get("adapter_dropout"),
"adapter_dim": algorithm_config_dict.get("adapter_dim"),
"alpha": algorithm_config_dict.get("alpha"),
"adapter_dropout": algorithm_config_dict.get("adapter_dropout"),
}.items()
if v is not None
}

View file

@ -25,13 +25,16 @@ class NVIDIASafetyConfig(BaseModel):
guardrails_service_url: str = Field(
default_factory=lambda: os.getenv("GUARDRAILS_SERVICE_URL", "http://0.0.0.0:7331"),
description="The url for accessing the guardrails service",
description="The url for accessing the Guardrails service",
)
config_id: Optional[str] = Field(
default_factory=lambda: os.getenv("NVIDIA_GUARDRAILS_CONFIG_ID", "self-check"),
description="Guardrails configuration ID to use from the Guardrails configuration store",
)
config_id: Optional[str] = Field(default="self-check", description="Config ID to use from the config store")
@classmethod
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
return {
"guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
"config_id": "self-check",
"config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}",
}

View file

@ -12,8 +12,8 @@ import requests
from llama_stack.apis.inference import Message
from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel
from llama_stack.apis.shields import Shield
from llama_stack.distribution.library_client import convert_pydantic_to_json_value
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
from .config import NVIDIASafetyConfig
@ -28,7 +28,6 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
Args:
config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID.
"""
print(f"Initializing NVIDIASafetyAdapter({config.guardrails_service_url})...")
self.config = config
async def initialize(self) -> None:
@ -127,9 +126,10 @@ class NeMoGuardrails:
Raises:
requests.HTTPError: If the POST request fails.
"""
messages = [await convert_message_to_openai_dict_new(message) for message in messages]
request_data = {
"model": self.model,
"messages": convert_pydantic_to_json_value(messages),
"messages": messages,
"temperature": self.temperature,
"top_p": 1,
"frequency_penalty": 0,
@ -140,6 +140,8 @@ class NeMoGuardrails:
"config_id": self.config_id,
},
}
print("request_data")
print(request_data)
response = await self._guardrails_post(path="/v1/guardrail/checks", data=request_data)
if response["status"] == "blocked":

View file

@ -65,7 +65,7 @@ def get_distribution_template() -> DistributionTemplate:
default_models = get_model_registry(available_models)
return DistributionTemplate(
name="nvidia",
distro_type="remote_hosted",
distro_type="self_hosted",
description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
container_image=None,
template_path=Path(__file__).parent / "doc_template.md",
@ -103,6 +103,10 @@ def get_distribution_template() -> DistributionTemplate:
"llama-stack-user",
"NVIDIA User ID",
),
"NVIDIA_APPEND_API_VERSION": (
"True",
"Whether to append the API version to the base_url",
),
"NVIDIA_DATASET_NAMESPACE": (
"default",
"NVIDIA Dataset Namespace",
@ -127,6 +131,10 @@ def get_distribution_template() -> DistributionTemplate:
"http://0.0.0.0:7331",
"URL for the NeMo Guardrails Service",
),
"NVIDIA_GUARDRAILS_CONFIG_ID": (
"self-check",
"NVIDIA Guardrail Configuration ID",
),
"NVIDIA_EVALUATOR_URL": (
"http://0.0.0.0:7331",
"URL for the NeMo Evaluator Service",

View file

@ -18,11 +18,12 @@ providers:
config:
url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
- provider_id: nvidia
provider_type: remote::nvidia
config:
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
config_id: self-check
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
@ -36,7 +37,7 @@ providers:
provider_type: remote::nvidia
config:
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
config_id: self-check
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference

View file

@ -18,6 +18,7 @@ providers:
config:
url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
@ -31,7 +32,7 @@ providers:
provider_type: remote::nvidia
config:
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
config_id: self-check
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference