From feabcdd67ba4e169d9dec46d14c29fc3f09ad790 Mon Sep 17 00:00:00 2001 From: raghotham Date: Tue, 28 Oct 2025 16:05:44 -0700 Subject: [PATCH] docs: add documentation on how to use custom run yaml in docker (#3949) as title test plan: ```yaml # custom-ollama-run.yaml version: 2 image_name: starter external_providers_dir: /.llama/providers.d apis: - inference - vector_io - files - safety - tool_runtime - agents providers: inference: # Single Ollama provider for all models - provider_id: ollama provider_type: remote::ollama config: url: ${env.OLLAMA_URL:=http://localhost:11434} vector_io: - provider_id: faiss provider_type: inline::faiss config: persistence: namespace: vector_io::faiss backend: kv_default files: - provider_id: meta-reference-files provider_type: inline::localfs config: storage_dir: /.llama/files metadata_store: table_name: files_metadata backend: sql_default safety: - provider_id: llama-guard provider_type: inline::llama-guard config: excluded_categories: [] tool_runtime: - provider_id: rag-runtime provider_type: inline::rag-runtime agents: - provider_id: meta-reference provider_type: inline::meta-reference config: persistence: agent_state: namespace: agents backend: kv_default responses: table_name: responses backend: sql_default max_write_queue_size: 10000 num_writers: 4 storage: backends: kv_default: type: kv_sqlite db_path: /.llama/kvstore.db sql_default: type: sql_sqlite db_path: /.llama/sql_store.db stores: metadata: namespace: registry backend: kv_default inference: table_name: inference_store backend: sql_default max_write_queue_size: 10000 num_writers: 4 conversations: table_name: openai_conversations backend: sql_default registered_resources: models: # All models use the same 'ollama' provider - model_id: llama3.2-vision:latest provider_id: ollama provider_model_id: llama3.2-vision:latest model_type: llm - model_id: llama3.2:3b provider_id: ollama provider_model_id: llama3.2:3b model_type: llm # Embedding models - model_id: nomic-embed-text-v2-moe provider_id: ollama provider_model_id: toshk0/nomic-embed-text-v2-moe:Q6_K model_type: embedding metadata: embedding_dimension: 768 shields: [] vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: [] server: port: 8321 telemetry: enabled: true vector_stores: default_provider_id: faiss default_embedding_model: provider_id: ollama model_id: toshk0/nomic-embed-text-v2-moe:Q6_K ``` ```bash docker run -it --pull always -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -v ~/.llama:/root/.llama -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml -e RUN_CONFIG_PATH=/app/custom-run.yaml -e OLLAMA_URL=http://host.docker.internal:11434/ llamastack/distribution-starter:0.3.0 --port $LLAMA_STACK_PORT ``` --- .../self_hosted_distro/meta-reference-gpu.md | 27 +++++++++++++++ .../self_hosted_distro/nvidia.md | 30 +++++++++++++++-- .../distributions/dell/doc_template.md | 31 +++++++++++++++++ .../meta-reference-gpu/doc_template.md | 30 +++++++++++++++++ .../distributions/nvidia/doc_template.md | 33 +++++++++++++++++-- src/llama_stack/distributions/template.py | 1 + 6 files changed, 148 insertions(+), 4 deletions(-) diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md index b7134b3e1..9c4095e88 100644 --- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md @@ -79,6 +79,33 @@ docker run \ --port $LLAMA_STACK_PORT ``` +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +LLAMA_STACK_PORT=8321 + +docker run \ + -it \ + --pull always \ + --gpu all \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + llamastack/distribution-meta-reference-gpu \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +Available run configurations for this distribution: +- `run.yaml` +- `run-with-safety.yaml` + ### Via venv Make sure you have the Llama Stack CLI available. diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md index 4a7d99ff5..c48a7d391 100644 --- a/docs/docs/distributions/self_hosted_distro/nvidia.md +++ b/docs/docs/distributions/self_hosted_distro/nvidia.md @@ -127,13 +127,39 @@ docker run \ -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-nvidia \ - --config /root/my-run.yaml \ --port $LLAMA_STACK_PORT ``` +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +LLAMA_STACK_PORT=8321 + +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ + llamastack/distribution-nvidia \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +Available run configurations for this distribution: +- `run.yaml` +- `run-with-safety.yaml` + ### Via venv If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment. diff --git a/src/llama_stack/distributions/dell/doc_template.md b/src/llama_stack/distributions/dell/doc_template.md index 4e28673e8..1530f665a 100644 --- a/src/llama_stack/distributions/dell/doc_template.md +++ b/src/llama_stack/distributions/dell/doc_template.md @@ -152,6 +152,37 @@ docker run \ --port $LLAMA_STACK_PORT ``` +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml + +docker run -it \ + --pull always \ + --network host \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v $HOME/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e CHROMA_URL=$CHROMA_URL \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +{% if run_configs %} +Available run configurations for this distribution: +{% for config in run_configs %} +- `{{ config }}` +{% endfor %} +{% endif %} + ### Via Conda Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available. diff --git a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md index ec4452d81..af71d8388 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md +++ b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md @@ -68,6 +68,36 @@ docker run \ --port $LLAMA_STACK_PORT ``` +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +LLAMA_STACK_PORT=8321 + +docker run \ + -it \ + --pull always \ + --gpu all \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +{% if run_configs %} +Available run configurations for this distribution: +{% for config in run_configs %} +- `{{ config }}` +{% endfor %} +{% endif %} + ### Via venv Make sure you have the Llama Stack CLI available. diff --git a/src/llama_stack/distributions/nvidia/doc_template.md b/src/llama_stack/distributions/nvidia/doc_template.md index 40f39e4f3..054a1e3ec 100644 --- a/src/llama_stack/distributions/nvidia/doc_template.md +++ b/src/llama_stack/distributions/nvidia/doc_template.md @@ -117,13 +117,42 @@ docker run \ -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-{{ name }} \ - --config /root/my-run.yaml \ --port $LLAMA_STACK_PORT ``` +### Via Docker with Custom Run Configuration + +You can also run the Docker container with a custom run configuration file by mounting it into the container: + +```bash +# Set the path to your custom run.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +LLAMA_STACK_PORT=8321 + +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ + -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ + llamastack/distribution-{{ name }} \ + --port $LLAMA_STACK_PORT +``` + +**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use. + +{% if run_configs %} +Available run configurations for this distribution: +{% for config in run_configs %} +- `{{ config }}` +{% endfor %} +{% endif %} + ### Via venv If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment. diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py index 1dad60064..e6813806a 100644 --- a/src/llama_stack/distributions/template.py +++ b/src/llama_stack/distributions/template.py @@ -424,6 +424,7 @@ class DistributionTemplate(BaseModel): providers_table=providers_table, run_config_env_vars=self.run_config_env_vars, default_models=default_models, + run_configs=list(self.run_configs.keys()), ) return ""