diff --git a/distributions/nutanix/compose.yaml b/distributions/nutanix/compose.yaml new file mode 100644 index 000000000..40e87a1a2 --- /dev/null +++ b/distributions/nutanix/compose.yaml @@ -0,0 +1,15 @@ +services: + llamastack: + image: distribution-nutanix + volumes: + - ~/.llama:/root/.llama + - ./run.yaml:/root/llamastack-run-nutanix.yaml + ports: + - "5000:5000" + entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-nutanix.yaml" + deploy: + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s diff --git a/distributions/nutanix/run.yaml b/distributions/nutanix/run.yaml new file mode 100644 index 000000000..15f6a13a2 --- /dev/null +++ b/distributions/nutanix/run.yaml @@ -0,0 +1,51 @@ +version: '2' +image_name: nutanix +docker_image: null +conda_env: nutanix +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: nutanix + provider_type: remote::nutanix + config: + url: https://ai.nutanix.com/api/v1 + api_key: ${env.NUTANIX_API_KEY} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nutanix}/faiss_store.db + safety: + - provider_id: nutanix + provider_type: remote::nutanix + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nutanix}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nutanix}/registry.db +models: [] +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/llama_stack/providers/remote/inference/nutanix/__init__.py b/llama_stack/providers/remote/inference/nutanix/__init__.py index 013f7d560..9b63fb6ab 100644 --- a/llama_stack/providers/remote/inference/nutanix/__init__.py +++ b/llama_stack/providers/remote/inference/nutanix/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) Nutanix, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the terms described in the LICENSE file in diff --git a/llama_stack/providers/remote/inference/nutanix/config.py b/llama_stack/providers/remote/inference/nutanix/config.py index 6af6af3ff..2ffd31667 100644 --- a/llama_stack/providers/remote/inference/nutanix/config.py +++ b/llama_stack/providers/remote/inference/nutanix/config.py @@ -1,9 +1,11 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) Nutanix, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict, Optional + from llama_models.schema_utils import json_schema_type from pydantic import BaseModel, Field @@ -11,10 +13,17 @@ from pydantic import BaseModel, Field @json_schema_type class NutanixImplConfig(BaseModel): url: str = Field( - default=None, - description="The URL of the Nutanix AI endpoint", + default="https://ai.nutanix.com/api/v1", + description="The URL of the Nutanix AI Endpoint", ) - api_token: str = Field( + api_key: Optional[str] = Field( default=None, - description="The API token of the Nutanix AI endpoint", + description="The API key to the Nutanix AI Endpoint", ) + + @classmethod + def sample_run_config(cls) -> Dict[str, Any]: + return { + "url": "https://ai.nutanix.com/api/v1", + "api_key": "${env.NUTANIX_API_KEY}", + } diff --git a/llama_stack/providers/remote/inference/nutanix/nutanix.py b/llama_stack/providers/remote/inference/nutanix/nutanix.py index 3c0782eb3..7c54940d5 100644 --- a/llama_stack/providers/remote/inference/nutanix/nutanix.py +++ b/llama_stack/providers/remote/inference/nutanix/nutanix.py @@ -1,4 +1,4 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) Nutanix, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the terms described in the LICENSE file in @@ -30,7 +30,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import NutanixImplConfig -model_aliases = [ +MODEL_ALIASES = [ build_model_alias( "vllm-llama-3-1", CoreModelId.llama3_1_8b_instruct.value, @@ -40,7 +40,7 @@ model_aliases = [ class NutanixInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: NutanixImplConfig) -> None: - ModelRegistryHelper.__init__(self, model_aliases) + ModelRegistryHelper.__init__(self, MODEL_ALIASES) self.config = config self.formatter = ChatFormat(Tokenizer.get_instance()) @@ -50,6 +50,20 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: pass + def _get_client(self) -> OpenAI: + nutanix_api_key = None + if self.config.api_key: + nutanix_api_key = self.config.api_key + else: + provider_data = self.get_request_provider_data() + if provider_data is None or not provider_data.nutanix_api_key: + raise ValueError( + 'Pass Together API Key in the header X-LlamaStack-ProviderData as { "nutanix_api_key": }' + ) + nutanix_api_key = provider_data.nutanix_api_key + + return OpenAI(base_url=self.config.url, api_key=nutanix_api_key) + async def completion( self, model_id: str, @@ -85,7 +99,7 @@ class NutanixInferenceAdapter(ModelRegistryHelper, Inference): logprobs=logprobs, ) - client = OpenAI(base_url=self.config.url, api_key=self.config.api_token) + client = self._get_client() if stream: return self._stream_chat_completion(request, client) else: diff --git a/llama_stack/templates/nutanix/__init__.py b/llama_stack/templates/nutanix/__init__.py new file mode 100644 index 000000000..20adc3fc3 --- /dev/null +++ b/llama_stack/templates/nutanix/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Nutanix, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .nutanix import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/nutanix/build.yaml b/llama_stack/templates/nutanix/build.yaml index e3fcea06a..7c80b278e 100644 --- a/llama_stack/templates/nutanix/build.yaml +++ b/llama_stack/templates/nutanix/build.yaml @@ -3,7 +3,10 @@ distribution_spec: description: Use Nutanix AI Endpoint for running LLM inference providers: inference: remote::nutanix - memory: inline::faiss + memory: + - inline::faiss + - remote::chromadb + - remote::pgvector safety: inline::llama-guard agents: inline::meta-reference telemetry: inline::meta-reference diff --git a/distributions/nutanix/README.md b/llama_stack/templates/nutanix/doc_template.md similarity index 88% rename from distributions/nutanix/README.md rename to llama_stack/templates/nutanix/doc_template.md index fdd9e3106..e67529771 100644 --- a/distributions/nutanix/README.md +++ b/llama_stack/templates/nutanix/doc_template.md @@ -26,12 +26,12 @@ Please adjust the `NUTANIX_SUPPORTED_MODELS` variable at line 29 in `llama_stack 3. Build the distrbution ``` pip install -e . -llama stack build --template nutanix --name ntnx --image-type conda +llama stack build --template nutanix --image-type conda ``` -4. Set the endpoint URL and API Key +4. Edit the yaml file ``` -llama stack configure ntnx +vim ``` 5. Serve and enjoy! diff --git a/llama_stack/templates/nutanix/nutanix.py b/llama_stack/templates/nutanix/nutanix.py new file mode 100644 index 000000000..982b4e9a6 --- /dev/null +++ b/llama_stack/templates/nutanix/nutanix.py @@ -0,0 +1,70 @@ +# Copyright (c) Nutanix, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pathlib import Path + +from llama_models.sku_list import all_registered_models + +from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings + +from llama_stack.providers.remote.inference.nutanix import NutanixImplConfig +from llama_stack.providers.remote.inference.nutanix.nutanix import MODEL_ALIASES + +def get_distribution_template() -> DistributionTemplate: + providers = { + "inference": ["remote::nutanix"], + "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], + "safety": ["inline::llama-guard"], + "agents": ["inline::meta-reference"], + "telemetry": ["inline::meta-reference"], + } + + inference_provider = Provider( + provider_id="nutanix", + provider_type="remote::nutanix", + config=NutanixImplConfig.sample_run_config(), + ) + + core_model_to_hf_repo = { + m.descriptor(): m.huggingface_repo for m in all_registered_models() + } + default_models = [ + ModelInput( + model_id=core_model_to_hf_repo[m.llama_model], + provider_model_id=m.provider_model_id, + ) + for m in MODEL_ALIASES + ] + + return DistributionTemplate( + name="nutanix", + distro_type="remote::nutanix", + description="Use Nutanix AI Endpoint for running LLM inference", + docker_image=None, + template_path=Path(__file__).parent / "doc_template.md", + providers=providers, + default_models=default_models, + run_configs={ + "run.yaml": RunConfigSettings( + provider_overrides={ + "inference": [inference_provider], + }, + default_models=default_models, + default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], + ), + }, + run_config_env_vars={ + "LLAMASTACK_PORT": ( + "5001", + "Port for the Llama Stack distribution server", + ), + "NUTANIX_API_KEY": ( + "", + "Nutanix AI Endpoint API Key", + ), + }, + ) diff --git a/llama_stack/templates/nutanix/run.yaml b/llama_stack/templates/nutanix/run.yaml new file mode 100644 index 000000000..6c69f190e --- /dev/null +++ b/llama_stack/templates/nutanix/run.yaml @@ -0,0 +1,54 @@ +version: '2' +image_name: nutanix +docker_image: null +conda_env: nutanix +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: nutanix + provider_type: remote::nutanix + config: + url: https://ai.nutanix.com/api/v1 + api_key: ${env.NUTANIX_API_KEY} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nutanix}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nutanix}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nutanix}/registry.db +models: + - model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: null + provider_model_id: vllm-llama-3-1 +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: []