docs: add documentation on how to use custom run yaml in docker (#3949)

as title

test plan:

```yaml
# custom-ollama-run.yaml
version: 2
image_name: starter
external_providers_dir: /.llama/providers.d
apis:
- inference
- vector_io
- files
- safety
- tool_runtime
- agents

providers:
  inference:
  # Single Ollama provider for all models
  - provider_id: ollama
    provider_type: remote::ollama
    config:
      url: ${env.OLLAMA_URL:=http://localhost:11434}

  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      persistence:
        namespace: vector_io::faiss
        backend: kv_default

  files:
  - provider_id: meta-reference-files
    provider_type: inline::localfs
    config:
      storage_dir: /.llama/files
      metadata_store:
        table_name: files_metadata
        backend: sql_default

  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config:
      excluded_categories: []

  tool_runtime:
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime

  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence:
        agent_state:
          namespace: agents
          backend: kv_default
        responses:
          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4

storage:
  backends:
    kv_default:
      type: kv_sqlite
      db_path: /.llama/kvstore.db
    sql_default:
      type: sql_sqlite
      db_path: /.llama/sql_store.db
  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default

registered_resources:
  models:
  # All models use the same 'ollama' provider
  - model_id: llama3.2-vision:latest
    provider_id: ollama
    provider_model_id: llama3.2-vision:latest
    model_type: llm
  - model_id: llama3.2:3b
    provider_id: ollama
    provider_model_id: llama3.2:3b
    model_type: llm
  # Embedding models
  - model_id: nomic-embed-text-v2-moe
    provider_id: ollama
    provider_model_id: toshk0/nomic-embed-text-v2-moe:Q6_K
    model_type: embedding
    metadata:
      embedding_dimension: 768
  shields: []
  vector_dbs: []
  datasets: []
  scoring_fns: []
  benchmarks: []
  tool_groups: []

server:
  port: 8321

telemetry:
  enabled: true

vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: ollama
    model_id: toshk0/nomic-embed-text-v2-moe:Q6_K
```

```bash
docker run
     -it
     --pull always
     -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT
     -v ~/.llama:/root/.llama
     -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml
     -e RUN_CONFIG_PATH=/app/custom-run.yaml
     -e OLLAMA_URL=http://host.docker.internal:11434/
     llamastack/distribution-starter:0.3.0
     --port $LLAMA_STACK_PORT
```
This commit is contained in:
raghotham 2025-10-28 16:05:44 -07:00 committed by GitHub
parent f88416ef87
commit feabcdd67b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 148 additions and 4 deletions

View file

@ -79,6 +79,33 @@ docker run \
--port $LLAMA_STACK_PORT
```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
--gpu all \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
llamastack/distribution-meta-reference-gpu \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
Available run configurations for this distribution:
- `run.yaml`
- `run-with-safety.yaml`
### Via venv
Make sure you have the Llama Stack CLI available.

View file

@ -127,13 +127,39 @@ docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \
-v ~/.llama:/root/.llama \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-nvidia \
--config /root/my-run.yaml \
--port $LLAMA_STACK_PORT
```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-nvidia \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
Available run configurations for this distribution:
- `run.yaml`
- `run-with-safety.yaml`
### Via venv
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.