diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py index c27bea1a2..8f80408d4 100644 --- a/llama_stack/providers/remote/inference/nvidia/config.py +++ b/llama_stack/providers/remote/inference/nvidia/config.py @@ -48,8 +48,8 @@ class NVIDIAConfig(BaseModel): description="Timeout for the HTTP requests", ) append_api_version: bool = Field( - default=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", True), - description="Whether to append the API version to the model ID", + default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false", + description="When set to false, the API version will not be appended to the base_url. By default, it is true.", ) @classmethod