diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py index 4d8538b2e..abd34b498 100644 --- a/llama_stack/providers/remote/inference/nvidia/config.py +++ b/llama_stack/providers/remote/inference/nvidia/config.py @@ -7,7 +7,7 @@ import os from typing import Any, Dict, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, SecretStr from llama_stack.schema_utils import json_schema_type @@ -39,7 +39,7 @@ class NVIDIAConfig(BaseModel): default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"), description="A base url for accessing the NVIDIA NIM", ) - api_key: Optional[str] = Field( + api_key: Optional[SecretStr] = Field( default_factory=lambda: os.getenv("NVIDIA_API_KEY"), description="The NVIDIA API key, only needed of using the hosted service", ) diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 48a609b8f..db9e176ee 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -85,7 +85,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): # make sure the client lives longer than any async calls self._client = AsyncOpenAI( base_url=f"{self._config.url}/v1", - api_key=(self._config.api_key if self._config.api_key else "NO KEY"), + api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"), timeout=self._config.timeout, ) diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index db5c4270a..308c0e2a6 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -6,14 +6,11 @@ from pathlib import Path -from llama_stack.distribution.datatypes import Provider, ToolGroupInput -from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig -from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig +from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: diff --git a/tests/api/fixtures/recorded_responses/chat_completion.json b/tests/api/fixtures/recorded_responses/chat_completion.json index 6562d4a5c..6f2973ffc 100644 --- a/tests/api/fixtures/recorded_responses/chat_completion.json +++ b/tests/api/fixtures/recorded_responses/chat_completion.json @@ -10630,4 +10630,4 @@ ], "type": "generator" } -} \ No newline at end of file +} diff --git a/tests/api/fixtures/recorded_responses/invoke_tool.json b/tests/api/fixtures/recorded_responses/invoke_tool.json index 1559ad8e6..b6300f7e3 100644 --- a/tests/api/fixtures/recorded_responses/invoke_tool.json +++ b/tests/api/fixtures/recorded_responses/invoke_tool.json @@ -290,4 +290,4 @@ "metadata": null } } -} \ No newline at end of file +}