From 1d960bacdc39609b91fa7f771692127eab77d1cf Mon Sep 17 00:00:00 2001 From: Rashmi Pawar Date: Wed, 16 Apr 2025 14:33:41 +0530 Subject: [PATCH] add version env variable --- .../distributions/remote_hosted_distro/nvidia.md | 3 +-- .../distributions/self_hosted_distro/nvidia.md | 2 -- .../providers/remote/inference/nvidia/config.py | 5 +++++ .../providers/remote/inference/nvidia/nvidia.py | 11 ++++------- .../providers/remote/post_training/nvidia/README.md | 1 - llama_stack/templates/nvidia/nvidia.py | 12 ++++-------- llama_stack/templates/nvidia/run-with-safety.yaml | 1 + llama_stack/templates/nvidia/run.yaml | 1 + 8 files changed, 16 insertions(+), 20 deletions(-) diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 58731392d..c470faea7 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -22,9 +22,8 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov The following environment variables can be configured: - `NVIDIA_API_KEY`: NVIDIA API Key (default: ``) -- `NVIDIA_USER_ID`: NVIDIA User ID (default: `llama-stack-user`) +- `NVIDIA_APPEND_API_VERSION`: Whether to append the API version to the base_url (default: `True`) - `NVIDIA_DATASET_NAMESPACE`: NVIDIA Dataset Namespace (default: `default`) -- `NVIDIA_ACCESS_POLICIES`: NVIDIA Access Policies (default: `{}`) - `NVIDIA_PROJECT_ID`: NVIDIA Project ID (default: `test-project`) - `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`) - `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`) diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md index 58731392d..7aca03bac 100644 --- a/docs/source/distributions/self_hosted_distro/nvidia.md +++ b/docs/source/distributions/self_hosted_distro/nvidia.md @@ -22,9 +22,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov The following environment variables can be configured: - `NVIDIA_API_KEY`: NVIDIA API Key (default: ``) -- `NVIDIA_USER_ID`: NVIDIA User ID (default: `llama-stack-user`) - `NVIDIA_DATASET_NAMESPACE`: NVIDIA Dataset Namespace (default: `default`) -- `NVIDIA_ACCESS_POLICIES`: NVIDIA Access Policies (default: `{}`) - `NVIDIA_PROJECT_ID`: NVIDIA Project ID (default: `test-project`) - `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`) - `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`) diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py index abd34b498..c27bea1a2 100644 --- a/llama_stack/providers/remote/inference/nvidia/config.py +++ b/llama_stack/providers/remote/inference/nvidia/config.py @@ -47,10 +47,15 @@ class NVIDIAConfig(BaseModel): default=60, description="Timeout for the HTTP requests", ) + append_api_version: bool = Field( + default=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", True), + description="Whether to append the API version to the model ID", + ) @classmethod def sample_run_config(cls, **kwargs) -> Dict[str, Any]: return { "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}", "api_key": "${env.NVIDIA_API_KEY:}", + "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}", } diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index a0caac737..c13be2e25 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -120,13 +120,10 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): "meta/llama-3.2-90b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct", } - # add /v1 in case of hosted models - base_url = self._config.url - if _is_nvidia_hosted(self._config): - if provider_model_id in special_model_urls: - base_url = special_model_urls[provider_model_id] - else: - base_url = f"{self._config.url}/v1" + base_url = f"{self._config.url}/v1" if self._config.append_api_version else self._config.url + + if _is_nvidia_hosted(self._config) and provider_model_id in special_model_urls: + base_url = special_model_urls[provider_model_id] return _get_client_for_base_url(base_url) async def completion( diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md index 13164667f..3ef538d29 100644 --- a/llama_stack/providers/remote/post_training/nvidia/README.md +++ b/llama_stack/providers/remote/post_training/nvidia/README.md @@ -36,7 +36,6 @@ import os os.environ["NVIDIA_API_KEY"] = "your-api-key" os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" -os.environ["NVIDIA_USER_ID"] = "llama-stack-user" os.environ["NVIDIA_DATASET_NAMESPACE"] = "default" os.environ["NVIDIA_PROJECT_ID"] = "test-project" os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1" diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index 3b0cbe1e5..b7fb7d453 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -90,19 +90,15 @@ def get_distribution_template() -> DistributionTemplate: "", "NVIDIA API Key", ), - ## Nemo Customizer related variables - "NVIDIA_USER_ID": ( - "llama-stack-user", - "NVIDIA User ID", + "NVIDIA_APPEND_API_VERSION": ( + "True", + "Whether to append the API version to the base_url", ), + ## Nemo Customizer related variables "NVIDIA_DATASET_NAMESPACE": ( "default", "NVIDIA Dataset Namespace", ), - "NVIDIA_ACCESS_POLICIES": ( - "{}", - "NVIDIA Access Policies", - ), "NVIDIA_PROJECT_ID": ( "test-project", "NVIDIA Project ID", diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index 658d9377e..2f7a62ce0 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -18,6 +18,7 @@ providers: config: url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} api_key: ${env.NVIDIA_API_KEY:} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True} - provider_id: nvidia provider_type: remote::nvidia config: diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index 1267a9883..c47eaa5cc 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -18,6 +18,7 @@ providers: config: url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} api_key: ${env.NVIDIA_API_KEY:} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True} vector_io: - provider_id: faiss provider_type: inline::faiss