mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
chore: rename run.yaml to config.yaml
since we only have one config, lets call it config.yaml! this should be treated as the source of truth for starting a stack change all file names, tests, etc. Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
4a3f9151e3
commit
0cd98c957e
64 changed files with 147 additions and 145 deletions
2
.github/workflows/README.md
vendored
2
.github/workflows/README.md
vendored
|
|
@ -4,7 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
|
|
||||||
| Name | File | Purpose |
|
| Name | File | Purpose |
|
||||||
| ---- | ---- | ------- |
|
| ---- | ---- | ------- |
|
||||||
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs |
|
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for config.yaml configs |
|
||||||
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
||||||
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
|
|
|
||||||
52
.github/workflows/backward-compat.yml
vendored
52
.github/workflows/backward-compat.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Backward Compatibility Check
|
name: Backward Compatibility Check
|
||||||
|
|
||||||
run-name: Check backward compatibility for run.yaml configs
|
run-name: Check backward compatibility for config.yaml configs
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
|
|
@ -12,7 +12,7 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- 'src/llama_stack/core/datatypes.py'
|
- 'src/llama_stack/core/datatypes.py'
|
||||||
- 'src/llama_stack/providers/datatypes.py'
|
- 'src/llama_stack/providers/datatypes.py'
|
||||||
- 'src/llama_stack/distributions/**/run.yaml'
|
- 'src/llama_stack/distributions/**/config.yaml'
|
||||||
- 'tests/backward_compat/**'
|
- 'tests/backward_compat/**'
|
||||||
- '.github/workflows/backward-compat.yml'
|
- '.github/workflows/backward-compat.yml'
|
||||||
|
|
||||||
|
|
@ -45,15 +45,15 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
uv sync --group dev
|
uv sync --group dev
|
||||||
|
|
||||||
- name: Extract run.yaml files from main branch
|
- name: Extract config.yaml files from main branch
|
||||||
id: extract_configs
|
id: extract_configs
|
||||||
run: |
|
run: |
|
||||||
# Get list of run.yaml paths from main
|
# Get list of config.yaml paths from main
|
||||||
git fetch origin main
|
git fetch origin main
|
||||||
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true)
|
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/config.yaml$" || true)
|
||||||
|
|
||||||
if [ -z "$CONFIG_PATHS" ]; then
|
if [ -z "$CONFIG_PATHS" ]; then
|
||||||
echo "No run.yaml files found in main branch"
|
echo "No config.yaml files found in main branch"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -125,7 +125,7 @@ jobs:
|
||||||
echo ""
|
echo ""
|
||||||
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
|
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
|
||||||
echo ""
|
echo ""
|
||||||
echo "This PR introduces backward-incompatible changes to run.yaml."
|
echo "This PR introduces backward-incompatible changes to config.yaml."
|
||||||
echo "The changes have been properly acknowledged."
|
echo "The changes have been properly acknowledged."
|
||||||
echo ""
|
echo ""
|
||||||
exit 0 # Pass the check
|
exit 0 # Pass the check
|
||||||
|
|
@ -133,7 +133,7 @@ jobs:
|
||||||
echo ""
|
echo ""
|
||||||
echo "❌ ERROR: Breaking changes detected without acknowledgment"
|
echo "❌ ERROR: Breaking changes detected without acknowledgment"
|
||||||
echo ""
|
echo ""
|
||||||
echo "This PR introduces backward-incompatible changes to run.yaml"
|
echo "This PR introduces backward-incompatible changes to config.yaml"
|
||||||
echo "that will break existing user configurations."
|
echo "that will break existing user configurations."
|
||||||
echo ""
|
echo ""
|
||||||
echo "To acknowledge this breaking change, do ONE of:"
|
echo "To acknowledge this breaking change, do ONE of:"
|
||||||
|
|
@ -155,11 +155,11 @@ jobs:
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Extract ci-tests run.yaml from main
|
- name: Extract ci-tests config.yaml from main
|
||||||
run: |
|
run: |
|
||||||
git fetch origin main
|
git fetch origin main
|
||||||
git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml
|
git show origin/main:src/llama_stack/distributions/ci-tests/config.yaml > /tmp/main-ci-tests-config.yaml
|
||||||
echo "Extracted ci-tests run.yaml from main branch"
|
echo "Extracted ci-tests config.yaml from main branch"
|
||||||
|
|
||||||
- name: Setup test environment
|
- name: Setup test environment
|
||||||
uses: ./.github/actions/setup-test-environment
|
uses: ./.github/actions/setup-test-environment
|
||||||
|
|
@ -175,7 +175,7 @@ jobs:
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: /tmp/main-ci-tests-run.yaml
|
stack-config: /tmp/main-ci-tests-config.yaml
|
||||||
setup: 'ollama'
|
setup: 'ollama'
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: 'base'
|
suite: 'base'
|
||||||
|
|
@ -258,21 +258,21 @@ jobs:
|
||||||
env:
|
env:
|
||||||
GH_TOKEN: ${{ github.token }}
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
- name: Extract ci-tests run.yaml from release
|
- name: Extract ci-tests config.yaml from release
|
||||||
if: steps.get_release.outputs.has_release == 'true'
|
if: steps.get_release.outputs.has_release == 'true'
|
||||||
id: extract_config
|
id: extract_config
|
||||||
run: |
|
run: |
|
||||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
|
||||||
# Try with src/ prefix first (newer releases), then without (older releases)
|
# Try with src/ prefix first (newer releases), then without (older releases)
|
||||||
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
|
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/config.yaml" > /tmp/release-ci-tests-config.yaml 2>/dev/null; then
|
||||||
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)"
|
echo "Extracted ci-tests config.yaml from release $RELEASE_TAG (src/ path)"
|
||||||
echo "has_config=true" >> $GITHUB_OUTPUT
|
echo "has_config=true" >> $GITHUB_OUTPUT
|
||||||
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
|
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/config.yaml" > /tmp/release-ci-tests-config.yaml 2>/dev/null; then
|
||||||
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)"
|
echo "Extracted ci-tests config.yaml from release $RELEASE_TAG (old path)"
|
||||||
echo "has_config=true" >> $GITHUB_OUTPUT
|
echo "has_config=true" >> $GITHUB_OUTPUT
|
||||||
else
|
else
|
||||||
echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG"
|
echo "::warning::ci-tests/config.yaml not found in release $RELEASE_TAG"
|
||||||
echo "has_config=false" >> $GITHUB_OUTPUT
|
echo "has_config=false" >> $GITHUB_OUTPUT
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -292,7 +292,7 @@ jobs:
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: /tmp/release-ci-tests-run.yaml
|
stack-config: /tmp/release-ci-tests-config.yaml
|
||||||
setup: 'ollama'
|
setup: 'ollama'
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: 'base'
|
suite: 'base'
|
||||||
|
|
@ -318,7 +318,7 @@ jobs:
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: /tmp/release-ci-tests-run.yaml
|
stack-config: /tmp/release-ci-tests-config.yaml
|
||||||
setup: 'ollama'
|
setup: 'ollama'
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: 'base'
|
suite: 'base'
|
||||||
|
|
@ -447,11 +447,11 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
|
||||||
# Get run.yaml files from the release (try both src/ and old path)
|
# Get config.yaml files from the release (try both src/ and old path)
|
||||||
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true)
|
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/config.yaml$" || true)
|
||||||
|
|
||||||
if [ -z "$CONFIG_PATHS" ]; then
|
if [ -z "$CONFIG_PATHS" ]; then
|
||||||
echo "::warning::No run.yaml files found in release $RELEASE_TAG"
|
echo "::warning::No config.yaml files found in release $RELEASE_TAG"
|
||||||
echo "has_configs=false" >> $GITHUB_OUTPUT
|
echo "has_configs=false" >> $GITHUB_OUTPUT
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
@ -523,7 +523,7 @@ jobs:
|
||||||
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
|
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
|
||||||
|
|
||||||
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
||||||
- Existing run.yaml configurations will fail validation
|
- Existing config.yaml configurations will fail validation
|
||||||
|
|
||||||
The tests pass on \`main\` but fail with this PR's changes.
|
The tests pass on \`main\` but fail with this PR's changes.
|
||||||
|
|
||||||
|
|
@ -543,7 +543,7 @@ jobs:
|
||||||
- Tests **PASS** on main branch ✅
|
- Tests **PASS** on main branch ✅
|
||||||
- Tests **FAIL** on PR branch ❌
|
- Tests **FAIL** on PR branch ❌
|
||||||
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
||||||
- Existing run.yaml configurations will fail validation
|
- Existing config.yaml configurations will fail validation
|
||||||
|
|
||||||
> **Note:** This is informational only and does not block merge.
|
> **Note:** This is informational only and does not block merge.
|
||||||
> Consider whether this breaking change is acceptable for users.
|
> Consider whether this breaking change is acceptable for users.
|
||||||
|
|
@ -570,7 +570,7 @@ jobs:
|
||||||
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||||
## ✅ Release Schema Compatibility Passed
|
## ✅ Release Schema Compatibility Passed
|
||||||
|
|
||||||
All run.yaml configs from release \`$RELEASE_TAG\` are compatible.
|
All config.yaml configs from release \`$RELEASE_TAG\` are compatible.
|
||||||
This PR maintains backward compatibility with the latest release.
|
This PR maintains backward compatibility with the latest release.
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
18
.github/workflows/integration-auth-tests.yml
vendored
18
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -72,7 +72,7 @@ jobs:
|
||||||
if: ${{ matrix.auth-provider == 'oauth2_token' }}
|
if: ${{ matrix.auth-provider == 'oauth2_token' }}
|
||||||
run: |
|
run: |
|
||||||
run_dir=$(mktemp -d)
|
run_dir=$(mktemp -d)
|
||||||
cat <<'EOF' > $run_dir/run.yaml
|
cat <<'EOF' > $run_dir/config.yaml
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: kube
|
image_name: kube
|
||||||
apis: []
|
apis: []
|
||||||
|
|
@ -101,17 +101,17 @@ jobs:
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
EOF
|
EOF
|
||||||
yq eval '.server.auth.provider_config.type = "${{ matrix.auth-provider }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.type = "${{ matrix.auth-provider }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.tls_cafile = "${{ env.KUBERNETES_CA_CERT_PATH }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.tls_cafile = "${{ env.KUBERNETES_CA_CERT_PATH }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.issuer = "${{ env.KUBERNETES_ISSUER }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.issuer = "${{ env.KUBERNETES_ISSUER }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.audience = "${{ env.KUBERNETES_AUDIENCE }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.audience = "${{ env.KUBERNETES_AUDIENCE }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.jwks.uri = "${{ env.KUBERNETES_API_SERVER_URL }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.jwks.uri = "${{ env.KUBERNETES_API_SERVER_URL }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/config.yaml
|
||||||
cat $run_dir/run.yaml
|
cat $run_dir/config.yaml
|
||||||
|
|
||||||
# avoid line breaks in the server log, especially because we grep it below.
|
# avoid line breaks in the server log, especially because we grep it below.
|
||||||
export LLAMA_STACK_LOG_WIDTH=200
|
export LLAMA_STACK_LOG_WIDTH=200
|
||||||
nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &
|
nohup uv run llama stack run $run_dir/config.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
4
.github/workflows/providers-build.yml
vendored
4
.github/workflows/providers-build.yml
vendored
|
|
@ -116,7 +116,7 @@ jobs:
|
||||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
|
||||||
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
|
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml"
|
||||||
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
||||||
fi
|
fi
|
||||||
|
|
@ -162,7 +162,7 @@ jobs:
|
||||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
|
||||||
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
|
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml"
|
||||||
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
2
.github/workflows/providers-list-deps.yml
vendored
2
.github/workflows/providers-list-deps.yml
vendored
|
|
@ -102,4 +102,4 @@ jobs:
|
||||||
USE_COPY_NOT_MOUNT: "true"
|
USE_COPY_NOT_MOUNT: "true"
|
||||||
LLAMA_STACK_DIR: "."
|
LLAMA_STACK_DIR: "."
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack list-deps src/llama_stack/distributions/ci-tests/run.yaml
|
uv run llama stack list-deps src/llama_stack/distributions/ci-tests/config.yaml
|
||||||
|
|
|
||||||
|
|
@ -43,8 +43,8 @@ jobs:
|
||||||
nohup ramalama serve llama3.2:3b-instruct-fp16 > ramalama_server.log 2>&1 &
|
nohup ramalama serve llama3.2:3b-instruct-fp16 > ramalama_server.log 2>&1 &
|
||||||
- name: Apply image type to config file
|
- name: Apply image type to config file
|
||||||
run: |
|
run: |
|
||||||
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
|
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/config.yaml
|
||||||
cat tests/external/ramalama-stack/run.yaml
|
cat tests/external/ramalama-stack/config.yaml
|
||||||
|
|
||||||
- name: Install distribution dependencies
|
- name: Install distribution dependencies
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -59,7 +59,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ramalama-stack-test/bin/activate
|
source ramalama-stack-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &
|
nohup llama stack run tests/external/ramalama-stack/config.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,7 @@ Published on: 2025-07-28T23:35:23Z
|
||||||
## Highlights
|
## Highlights
|
||||||
|
|
||||||
* Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc.
|
* Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc.
|
||||||
* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [run.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/run.yaml) for more details.
|
* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [config.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/config.yaml) for more details.
|
||||||
* All tests migrated to pytest now (thanks @Elbehery)
|
* All tests migrated to pytest now (thanks @Elbehery)
|
||||||
* DPO implementation in the post-training provider (thanks @Nehanth)
|
* DPO implementation in the post-training provider (thanks @Nehanth)
|
||||||
* (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back.
|
* (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back.
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ We have built-in functionality to run the supported open-benchmarks using llama-
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' template
|
Spin up llama stack server with 'open-benchmark' template
|
||||||
```
|
```
|
||||||
llama stack run llama_stack/distributions/open-benchmark/run.yaml
|
llama stack run llama_stack/distributions/open-benchmark/config.yaml
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,7 @@ Llama Stack provides OpenAI-compatible RAG capabilities through:
|
||||||
|
|
||||||
## Configuring Default Embedding Models
|
## Configuring Default Embedding Models
|
||||||
|
|
||||||
To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so:
|
To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your config.yaml like so:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
vector_stores:
|
vector_stores:
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,7 @@ Features:
|
||||||
- Context retrieval with token limits
|
- Context retrieval with token limits
|
||||||
|
|
||||||
:::note[Default Configuration]
|
:::note[Default Configuration]
|
||||||
By default, llama stack run.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers.
|
By default, llama stack config.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Model Context Protocol (MCP)
|
## Model Context Protocol (MCP)
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ We have built-in functionality to run the supported open-benckmarks using llama-
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' template
|
Spin up llama stack server with 'open-benchmark' template
|
||||||
```bash
|
```bash
|
||||||
llama stack run llama_stack/distributions/open-benchmark/run.yaml
|
llama stack run llama_stack/distributions/open-benchmark/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Run eval CLI
|
#### Run eval CLI
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ This guide will walk you through the process of adding a new API provider to Lla
|
||||||
- Begin by reviewing the [core concepts](../concepts/) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
- Begin by reviewing the [core concepts](../concepts/) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
||||||
- Determine the provider type ([Remote](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote) or [Inline](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline)). Remote providers make requests to external services, while inline providers execute implementation locally.
|
- Determine the provider type ([Remote](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote) or [Inline](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline)). Remote providers make requests to external services, while inline providers execute implementation locally.
|
||||||
- Add your provider to the appropriate [Registry](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/registry/). Specify pip dependencies necessary.
|
- Add your provider to the appropriate [Registry](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/registry/). Specify pip dependencies necessary.
|
||||||
- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `build.yaml` and `run.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
|
- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `build.yaml` and `config.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
|
||||||
|
|
||||||
|
|
||||||
Here are some example PRs to help you get started:
|
Here are some example PRs to help you get started:
|
||||||
|
|
|
||||||
|
|
@ -133,7 +133,7 @@ For more information about the operator, see the [llama-stack-k8s-operator repos
|
||||||
### Step 4: Deploy Llama Stack Server using Operator
|
### Step 4: Deploy Llama Stack Server using Operator
|
||||||
|
|
||||||
Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
|
Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
|
||||||
You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).
|
You can optionally override the default `config.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
cat <<EOF | kubectl apply -f -
|
cat <<EOF | kubectl apply -f -
|
||||||
|
|
@ -155,7 +155,7 @@ spec:
|
||||||
value: "4096"
|
value: "4096"
|
||||||
- name: VLLM_API_TOKEN
|
- name: VLLM_API_TOKEN
|
||||||
value: "fake"
|
value: "fake"
|
||||||
# Optional: override run.yaml from a ConfigMap using userConfig
|
# Optional: override config.yaml from a ConfigMap using userConfig
|
||||||
userConfig:
|
userConfig:
|
||||||
configMap:
|
configMap:
|
||||||
name: llama-stack-config
|
name: llama-stack-config
|
||||||
|
|
@ -172,7 +172,7 @@ EOF
|
||||||
- `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
|
- `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
|
||||||
- `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
|
- `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
|
||||||
- `server.containerSpec.env`: Environment variables to configure providers:
|
- `server.containerSpec.env`: Environment variables to configure providers:
|
||||||
- `server.userConfig`: (Optional) Override the default `run.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
|
- `server.userConfig`: (Optional) Override the default `config.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
|
||||||
- `server.storage.size`: Size of the persistent volume for model and data storage
|
- `server.storage.size`: Size of the persistent volume for model and data storage
|
||||||
- `server.storage.mountPath`: Where to mount the storage in the container
|
- `server.storage.mountPath`: Where to mount the storage in the container
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ This guide walks you through inspecting existing distributions, customising thei
|
||||||
All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
|
All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
|
||||||
|
|
||||||
- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
|
- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
|
||||||
- `run.yaml` – sample run configuration (when provided).
|
- `config.yaml` – sample run configuration (when provided).
|
||||||
- Documentation fragments that power this site.
|
- Documentation fragments that power this site.
|
||||||
|
|
||||||
Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
|
Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
|
||||||
|
|
@ -35,7 +35,7 @@ docker build . \
|
||||||
Handy build arguments:
|
Handy build arguments:
|
||||||
|
|
||||||
- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
|
- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
|
||||||
- `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/run.yaml`).
|
- `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/config.yaml`).
|
||||||
- `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
|
- `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
|
||||||
- `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
|
- `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
|
||||||
- `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
|
- `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
|
||||||
|
|
@ -50,7 +50,7 @@ External providers live outside the main repository but can be bundled by pointi
|
||||||
|
|
||||||
1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
|
1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
|
||||||
2. Update `build.yaml` with the directory and provider entries.
|
2. Update `build.yaml` with the directory and provider entries.
|
||||||
3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/run.yaml` if you want to bake the config.
|
3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/config.yaml` if you want to bake the config.
|
||||||
|
|
||||||
Example `build.yaml` excerpt for a custom Ollama provider:
|
Example `build.yaml` excerpt for a custom Ollama provider:
|
||||||
|
|
||||||
|
|
@ -142,7 +142,7 @@ If you prepared a custom run config, mount it into the container and reference i
|
||||||
```bash
|
```bash
|
||||||
docker run \
|
docker run \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $(pwd)/run.yaml:/app/run.yaml \
|
-v $(pwd)/config.yaml:/app/config.yaml \
|
||||||
llama-stack:starter \
|
llama-stack:starter \
|
||||||
/app/run.yaml
|
/app/config.yaml
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ sidebar_position: 6
|
||||||
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
|
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
|
||||||
|
|
||||||
```{note}
|
```{note}
|
||||||
The default `run.yaml` files generated by templates are starting points for your configuration. For guidance on customizing these files for your specific needs, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
The default `config.yaml` files generated by templates are starting points for your configuration. For guidance on customizing these files for your specific needs, see [Customizing Your config.yaml Configuration](customizing_run_yaml.md).
|
||||||
```
|
```
|
||||||
|
|
||||||
```{dropdown} 👋 Click here for a Sample Configuration File
|
```{dropdown} 👋 Click here for a Sample Configuration File
|
||||||
|
|
@ -195,7 +195,7 @@ You can override environment variables at runtime by setting them in your shell
|
||||||
# Set environment variables in your shell
|
# Set environment variables in your shell
|
||||||
export API_KEY=sk-123
|
export API_KEY=sk-123
|
||||||
export BASE_URL=https://custom-api.com
|
export BASE_URL=https://custom-api.com
|
||||||
llama stack run --config run.yaml
|
llama stack run --config config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Type Safety
|
#### Type Safety
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,16 @@
|
||||||
---
|
---
|
||||||
title: Customizing run.yaml
|
title: Customizing config.yaml
|
||||||
description: Customizing run.yaml files for Llama Stack templates
|
description: Customizing config.yaml files for Llama Stack templates
|
||||||
sidebar_label: Customizing run.yaml
|
sidebar_label: Customizing config.yaml
|
||||||
sidebar_position: 4
|
sidebar_position: 4
|
||||||
---
|
---
|
||||||
# Customizing run.yaml Files
|
# Customizing config.yaml Files
|
||||||
|
|
||||||
The `run.yaml` files generated by Llama Stack templates are **starting points** designed to be customized for your specific needs. They are not meant to be used as-is in production environments.
|
The `config.yaml` files generated by Llama Stack templates are **starting points** designed to be customized for your specific needs. They are not meant to be used as-is in production environments.
|
||||||
|
|
||||||
## Key Points
|
## Key Points
|
||||||
|
|
||||||
- **Templates are starting points**: Generated `run.yaml` files contain defaults for development/testing
|
- **Templates are starting points**: Generated `config.yaml` files contain defaults for development/testing
|
||||||
- **Customization expected**: Update URLs, credentials, models, and settings for your environment
|
- **Customization expected**: Update URLs, credentials, models, and settings for your environment
|
||||||
- **Version control separately**: Keep customized configs in your own repository
|
- **Version control separately**: Keep customized configs in your own repository
|
||||||
- **Environment-specific**: Create different configurations for dev, staging, production
|
- **Environment-specific**: Create different configurations for dev, staging, production
|
||||||
|
|
@ -29,7 +29,7 @@ You can customize:
|
||||||
## Best Practices
|
## Best Practices
|
||||||
|
|
||||||
- Use environment variables for secrets and environment-specific values
|
- Use environment variables for secrets and environment-specific values
|
||||||
- Create separate `run.yaml` files for different environments (dev, staging, prod)
|
- Create separate `config.yaml` files for different environments (dev, staging, prod)
|
||||||
- Document your changes with comments
|
- Document your changes with comments
|
||||||
- Test configurations before deployment
|
- Test configurations before deployment
|
||||||
- Keep your customized configs in version control
|
- Keep your customized configs in version control
|
||||||
|
|
@ -38,8 +38,8 @@ Example structure:
|
||||||
```
|
```
|
||||||
your-project/
|
your-project/
|
||||||
├── configs/
|
├── configs/
|
||||||
│ ├── dev-run.yaml
|
│ ├── dev-config.yaml
|
||||||
│ ├── prod-run.yaml
|
│ ├── prod-config.yaml
|
||||||
└── README.md
|
└── README.md
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ Then, you can access the APIs like `models` and `inference` on the client and ca
|
||||||
response = client.models.list()
|
response = client.models.list()
|
||||||
```
|
```
|
||||||
|
|
||||||
If you've created a [custom distribution](./building_distro), you can also use the run.yaml configuration file directly:
|
If you've created a [custom distribution](./building_distro), you can also use the config.yaml configuration file directly:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
client = LlamaStackAsLibraryClient(config_path)
|
client = LlamaStackAsLibraryClient(config_path)
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ This section provides an overview of the distributions available in Llama Stack.
|
||||||
|
|
||||||
- **[Available Distributions](./list_of_distributions.mdx)** - Complete list and comparison of all distributions
|
- **[Available Distributions](./list_of_distributions.mdx)** - Complete list and comparison of all distributions
|
||||||
- **[Building Custom Distributions](./building_distro.mdx)** - Create your own distribution from scratch
|
- **[Building Custom Distributions](./building_distro.mdx)** - Create your own distribution from scratch
|
||||||
- **[Customizing Configuration](./customizing_run_yaml.mdx)** - Customize run.yaml for your needs
|
- **[Customizing Configuration](./customizing_run_yaml.mdx)** - Customize config.yaml for your needs
|
||||||
- **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions
|
- **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions
|
||||||
- **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code
|
- **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code
|
||||||
- **[Configuration Reference](./configuration.mdx)** - Configuration file format details
|
- **[Configuration Reference](./configuration.mdx)** - Configuration file format details
|
||||||
|
|
|
||||||
|
|
@ -67,11 +67,11 @@ LLAMA_STACK_PORT=5001
|
||||||
docker run \
|
docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run.yaml:/root/my-run.yaml \
|
-v ./config.yaml:/root/my-config.yaml \
|
||||||
-e WATSONX_API_KEY=$WATSONX_API_KEY \
|
-e WATSONX_API_KEY=$WATSONX_API_KEY \
|
||||||
-e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
-e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
||||||
-e WATSONX_BASE_URL=$WATSONX_BASE_URL \
|
-e WATSONX_BASE_URL=$WATSONX_BASE_URL \
|
||||||
llamastack/distribution-watsonx \
|
llamastack/distribution-watsonx \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-config.yaml \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ The only difference vs. the `tgi` distribution is that it runs the Dell-TGI serv
|
||||||
```
|
```
|
||||||
$ cd distributions/dell-tgi/
|
$ cd distributions/dell-tgi/
|
||||||
$ ls
|
$ ls
|
||||||
compose.yaml README.md run.yaml
|
compose.yaml README.md config.yaml
|
||||||
$ docker compose up
|
$ docker compose up
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -65,10 +65,10 @@ registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-meta-llama-3.1
|
||||||
#### Start Llama Stack server pointing to TGI server
|
#### Start Llama Stack server pointing to TGI server
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
|
docker run --pull always --network host -it -p 8321:8321 -v ./config.yaml:/root/my-config.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
|
Make sure in you `config.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
|
||||||
```
|
```
|
||||||
inference:
|
inference:
|
||||||
- provider_id: tgi0
|
- provider_id: tgi0
|
||||||
|
|
|
||||||
|
|
@ -152,14 +152,14 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-config.yaml \
|
||||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
-e DEH_URL=$DEH_URL \
|
-e DEH_URL=$DEH_URL \
|
||||||
-e SAFETY_MODEL=$SAFETY_MODEL \
|
-e SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||||
-e CHROMA_URL=$CHROMA_URL \
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
llamastack/distribution-dell \
|
llamastack/distribution-dell \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-config.yaml \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -84,8 +84,8 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
LLAMA_STACK_PORT=8321
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
|
|
@ -94,8 +94,8 @@ docker run \
|
||||||
--gpu all \
|
--gpu all \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
llamastack/distribution-meta-reference-gpu \
|
llamastack/distribution-meta-reference-gpu \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
@ -103,7 +103,7 @@ docker run \
|
||||||
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
||||||
|
|
||||||
Available run configurations for this distribution:
|
Available run configurations for this distribution:
|
||||||
- `run.yaml`
|
- `config.yaml`
|
||||||
- `run-with-safety.yaml`
|
- `run-with-safety.yaml`
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -113,7 +113,7 @@ Make sure you have the Llama Stack CLI available.
|
||||||
```bash
|
```bash
|
||||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
llama stack run distributions/meta-reference-gpu/config.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -138,8 +138,8 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
LLAMA_STACK_PORT=8321
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
|
|
@ -147,8 +147,8 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
llamastack/distribution-nvidia \
|
llamastack/distribution-nvidia \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
|
|
@ -157,7 +157,7 @@ docker run \
|
||||||
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
||||||
|
|
||||||
Available run configurations for this distribution:
|
Available run configurations for this distribution:
|
||||||
- `run.yaml`
|
- `config.yaml`
|
||||||
- `run-with-safety.yaml`
|
- `run-with-safety.yaml`
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -169,7 +169,7 @@ INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./config.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ Note to start the container with Podman, you can do the same but replace `docker
|
||||||
`podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
|
`podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
|
||||||
with `host.containers.internal`.
|
with `host.containers.internal`.
|
||||||
|
|
||||||
The configuration YAML for the Ollama distribution is available at `distributions/ollama/run.yaml`.
|
The configuration YAML for the Ollama distribution is available at `distributions/ollama/config.yaml`.
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
Docker containers run in their own isolated network namespaces on Linux. To allow the container to communicate with services running on the host via `localhost`, you need `--network=host`. This makes the container use the host's network directly so it can connect to Ollama running on `localhost:11434`.
|
Docker containers run in their own isolated network namespaces on Linux. To allow the container to communicate with services running on the host via `localhost`, you need `--network=host`. This makes the container use the host's network directly so it can connect to Ollama running on `localhost:11434`.
|
||||||
|
|
|
||||||
|
|
@ -222,7 +222,7 @@ def get_provider_spec() -> ProviderSpec:
|
||||||
|
|
||||||
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
||||||
|
|
||||||
To install Llama Stack with this external provider a user can provider the following run.yaml:
|
To install Llama Stack with this external provider a user can provider the following config.yaml:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
version: 2
|
version: 2
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@ results = await client.vector_stores.search(
|
||||||
|
|
||||||
> **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation.
|
> **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation.
|
||||||
|
|
||||||
**Basic Setup**: Configure vector_io and files providers in your run.yaml
|
**Basic Setup**: Configure vector_io and files providers in your config.yaml
|
||||||
|
|
||||||
## Common Use Cases
|
## Common Use Cases
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,7 @@ Connectors are MCP servers maintained and managed by the Responses API provider.
|
||||||
|
|
||||||
**Open Questions:**
|
**Open Questions:**
|
||||||
- Should Llama Stack include built-in support for some, all, or none of OpenAI's connectors?
|
- Should Llama Stack include built-in support for some, all, or none of OpenAI's connectors?
|
||||||
- Should there be a mechanism for administrators to add custom connectors via `run.yaml` or an API?
|
- Should there be a mechanism for administrators to add custom connectors via `config.yaml` or an API?
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -210,7 +210,7 @@ Metadata allows you to attach additional information to a response for your own
|
||||||
|
|
||||||
**Status:** Feature Request
|
**Status:** Feature Request
|
||||||
|
|
||||||
When calling the OpenAI Responses API, model outputs go through safety models configured by OpenAI administrators. Perhaps Llama Stack should provide a mechanism to configure safety models (or non-model logic) for all Responses requests, either through `run.yaml` or an administrative API.
|
When calling the OpenAI Responses API, model outputs go through safety models configured by OpenAI administrators. Perhaps Llama Stack should provide a mechanism to configure safety models (or non-model logic) for all Responses requests, either through `config.yaml` or an administrative API.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -355,7 +355,7 @@ The purpose of scoring function is to calculate the score for each example based
|
||||||
Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe.
|
Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe.
|
||||||
|
|
||||||
### Add new benchmark into template
|
### Add new benchmark into template
|
||||||
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/run.yaml)
|
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/config.yaml)
|
||||||
|
|
||||||
Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have
|
Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have
|
||||||
- `benchmark_id`: identifier of the benchmark
|
- `benchmark_id`: identifier of the benchmark
|
||||||
|
|
@ -366,7 +366,7 @@ Secondly, you need to add the new benchmark you just created under the `benchmar
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' templates
|
Spin up llama stack server with 'open-benchmark' templates
|
||||||
```bash
|
```bash
|
||||||
llama stack run llama_stack/distributions/open-benchmark/run.yaml
|
llama stack run llama_stack/distributions/open-benchmark/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
Run eval benchmark CLI with your new benchmark id
|
Run eval benchmark CLI with your new benchmark id
|
||||||
|
|
|
||||||
|
|
@ -64,14 +64,14 @@ class StackListBuilds(Subcommand):
|
||||||
for name, (path, source_type) in sorted(distributions.items()):
|
for name, (path, source_type) in sorted(distributions.items()):
|
||||||
row = [name, source_type, str(path)]
|
row = [name, source_type, str(path)]
|
||||||
# Check for build and run config files
|
# Check for build and run config files
|
||||||
# For built-in distributions, configs are named build.yaml and run.yaml
|
# For built-in distributions, configs are named build.yaml and config.yaml
|
||||||
# For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml
|
# For custom distributions, configs are named {name}-build.yaml and {name}-config.yaml
|
||||||
if source_type == "built-in":
|
if source_type == "built-in":
|
||||||
build_config = "Yes" if (path / "build.yaml").exists() else "No"
|
build_config = "Yes" if (path / "build.yaml").exists() else "No"
|
||||||
run_config = "Yes" if (path / "run.yaml").exists() else "No"
|
run_config = "Yes" if (path / "config.yaml").exists() else "No"
|
||||||
else:
|
else:
|
||||||
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
|
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
|
||||||
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
|
run_config = "Yes" if (path / f"{name}-config.yaml").exists() else "No"
|
||||||
row.extend([build_config, run_config])
|
row.extend([build_config, run_config])
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
print_table(rows, headers, separate_rows=True)
|
print_table(rows, headers, separate_rows=True)
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ def generate_run_config(
|
||||||
image_name: str,
|
image_name: str,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
"""
|
"""
|
||||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
Generate a config.yaml template file for user to edit from a build.yaml file
|
||||||
"""
|
"""
|
||||||
apis = list(build_config.distribution_spec.providers.keys())
|
apis = list(build_config.distribution_spec.providers.keys())
|
||||||
distro_dir = DISTRIBS_BASE_DIR / image_name
|
distro_dir = DISTRIBS_BASE_DIR / image_name
|
||||||
|
|
@ -123,7 +123,7 @@ def generate_run_config(
|
||||||
)
|
)
|
||||||
run_config.providers[api].append(p_spec)
|
run_config.providers[api].append(p_spec)
|
||||||
|
|
||||||
run_config_file = build_dir / f"{image_name}-run.yaml"
|
run_config_file = build_dir / f"{image_name}-config.yaml"
|
||||||
|
|
||||||
with open(run_config_file, "w") as f:
|
with open(run_config_file, "w") as f:
|
||||||
to_write = json.loads(run_config.model_dump_json())
|
to_write = json.loads(run_config.model_dump_json())
|
||||||
|
|
@ -131,7 +131,7 @@ def generate_run_config(
|
||||||
|
|
||||||
# Only print this message for non-container builds since it will be displayed before the
|
# Only print this message for non-container builds since it will be displayed before the
|
||||||
# container is built
|
# container is built
|
||||||
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
# For non-container builds, the config.yaml is generated at the very end of the build process so it
|
||||||
# makes sense to display this message
|
# makes sense to display this message
|
||||||
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
||||||
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,8 @@ from typing import Any
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AccessRule,
|
AccessRule,
|
||||||
RoutedProtocol,
|
RoutedProtocol,
|
||||||
|
StackConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.core.datatypes import StackConfig
|
|
||||||
from llama_stack.core.store import DistributionRegistry
|
from llama_stack.core.store import DistributionRegistry
|
||||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||||
from llama_stack_api import Api, RoutingTable
|
from llama_stack_api import Api, RoutingTable
|
||||||
|
|
|
||||||
|
|
@ -224,7 +224,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
existing_models = await self.get_all_with_type("model")
|
existing_models = await self.get_all_with_type("model")
|
||||||
|
|
||||||
# we may have an alias for the model registered by the user (or during initialization
|
# we may have an alias for the model registered by the user (or during initialization
|
||||||
# from run.yaml) that we need to keep track of
|
# from config.yaml) that we need to keep track of
|
||||||
model_ids = {}
|
model_ids = {}
|
||||||
for model in existing_models:
|
for model in existing_models:
|
||||||
if model.provider_id != provider_id:
|
if model.provider_id != provider_id:
|
||||||
|
|
|
||||||
|
|
@ -500,7 +500,7 @@ async def refresh_registry_task(impls: dict[Api, Any]):
|
||||||
|
|
||||||
|
|
||||||
def get_stack_run_config_from_distro(distro: str) -> StackConfig:
|
def get_stack_run_config_from_distro(distro: str) -> StackConfig:
|
||||||
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/run.yaml"
|
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/config.yaml"
|
||||||
|
|
||||||
with importlib.resources.as_file(distro_path) as path:
|
with importlib.resources.as_file(distro_path) as path:
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
|
|
|
||||||
|
|
@ -111,7 +111,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"vector_io": [chromadb_provider],
|
"vector_io": [chromadb_provider],
|
||||||
|
|
|
||||||
|
|
@ -141,14 +141,14 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-config.yaml \
|
||||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
-e DEH_URL=$DEH_URL \
|
-e DEH_URL=$DEH_URL \
|
||||||
-e SAFETY_MODEL=$SAFETY_MODEL \
|
-e SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||||
-e CHROMA_URL=$CHROMA_URL \
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-config.yaml \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -157,16 +157,16 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
|
|
||||||
docker run -it \
|
docker run -it \
|
||||||
--pull always \
|
--pull always \
|
||||||
--network host \
|
--network host \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
-e DEH_URL=$DEH_URL \
|
-e DEH_URL=$DEH_URL \
|
||||||
-e CHROMA_URL=$CHROMA_URL \
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
|
|
|
||||||
|
|
@ -73,8 +73,8 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
LLAMA_STACK_PORT=8321
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
|
|
@ -83,8 +83,8 @@ docker run \
|
||||||
--gpu all \
|
--gpu all \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
@ -105,7 +105,7 @@ Make sure you have the Llama Stack CLI available.
|
||||||
```bash
|
```bash
|
||||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/{{ name }}/run.yaml \
|
llama stack run distributions/{{ name }}/config.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"vector_io": [vector_io_provider],
|
"vector_io": [vector_io_provider],
|
||||||
|
|
|
||||||
|
|
@ -128,8 +128,8 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
LLAMA_STACK_PORT=8321
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
|
|
@ -137,8 +137,8 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
|
|
@ -162,7 +162,7 @@ INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./config.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
"datasetio": [datasetio_provider],
|
"datasetio": [datasetio_provider],
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ def get_distribution_template(name: str = "oci") -> DistributionTemplate:
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
"vector_io": [vector_io_provider],
|
"vector_io": [vector_io_provider],
|
||||||
|
|
|
||||||
|
|
@ -261,7 +261,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
providers=providers,
|
providers=providers,
|
||||||
available_models_by_provider=available_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": inference_providers,
|
"inference": inference_providers,
|
||||||
"vector_io": vector_io_providers,
|
"vector_io": vector_io_providers,
|
||||||
|
|
|
||||||
|
|
@ -276,7 +276,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": base_run_settings,
|
"config.yaml": base_run_settings,
|
||||||
"run-with-postgres-store.yaml": postgres_run_settings,
|
"run-with-postgres-store.yaml": postgres_run_settings,
|
||||||
},
|
},
|
||||||
run_config_env_vars={
|
run_config_env_vars={
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
"files": [files_provider],
|
"files": [files_provider],
|
||||||
|
|
|
||||||
|
|
@ -92,10 +92,10 @@ def config_to_category_levels(category: str, level: str):
|
||||||
|
|
||||||
def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
|
def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
|
||||||
"""
|
"""
|
||||||
Helper function to parse a yaml logging configuration found in the run.yaml
|
Helper function to parse a yaml logging configuration found in the config.yaml
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
yaml_config (Logging): the logger config object found in the run.yaml
|
yaml_config (Logging): the logger config object found in the config.yaml
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict[str, int]: A dictionary mapping categories to their log levels.
|
Dict[str, int]: A dictionary mapping categories to their log levels.
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ class MetaReferenceInferenceConfig(BaseModel):
|
||||||
# this is a placeholder to indicate inference model id
|
# this is a placeholder to indicate inference model id
|
||||||
# the actual inference model id is dtermined by the moddel id in the request
|
# the actual inference model id is dtermined by the moddel id in the request
|
||||||
# Note: you need to register the model before using it for inference
|
# Note: you need to register the model before using it for inference
|
||||||
# models in the resouce list in the run.yaml config will be registered automatically
|
# models in the resouce list in the config.yaml config will be registered automatically
|
||||||
model: str | None = None
|
model: str | None = None
|
||||||
torch_seed: int | None = None
|
torch_seed: int | None = None
|
||||||
max_seq_len: int = 4096
|
max_seq_len: int = 4096
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,9 @@ class _HfAdapter(OpenAIMixin):
|
||||||
class TGIAdapter(_HfAdapter):
|
class TGIAdapter(_HfAdapter):
|
||||||
async def initialize(self, config: TGIImplConfig) -> None:
|
async def initialize(self, config: TGIImplConfig) -> None:
|
||||||
if not config.base_url:
|
if not config.base_url:
|
||||||
raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
|
raise ValueError(
|
||||||
|
"You must provide a URL in config.yaml (or via the TGI_URL environment variable) to use TGI."
|
||||||
|
)
|
||||||
log.info(f"Initializing TGI client with url={config.base_url}")
|
log.info(f"Initializing TGI client with url={config.base_url}")
|
||||||
# Extract base URL without /v1 for HF client initialization
|
# Extract base URL without /v1 for HF client initialization
|
||||||
base_url_str = str(config.base_url).rstrip("/")
|
base_url_str = str(config.base_url).rstrip("/")
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
if not self.config.base_url:
|
if not self.config.base_url:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
|
"You must provide a URL in config.yaml (or via the VLLM_URL environment variable) to use vLLM."
|
||||||
)
|
)
|
||||||
|
|
||||||
async def health(self) -> HealthResponse:
|
async def health(self) -> HealthResponse:
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ For running integration tests, you must provide a few things:
|
||||||
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
||||||
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:starter:8322`)
|
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:starter:8322`)
|
||||||
- a URL which points to a Llama Stack distribution server
|
- a URL which points to a Llama Stack distribution server
|
||||||
- a distribution name (e.g., `starter`) or a path to a `run.yaml` file
|
- a distribution name (e.g., `starter`) or a path to a `config.yaml` file
|
||||||
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||||
|
|
||||||
- Any API keys you need to use should be set in the environment, or can be passed in with the --env option.
|
- Any API keys you need to use should be set in the environment, or can be passed in with the --env option.
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,10 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Backward compatibility test for run.yaml files.
|
Backward compatibility test for config.yaml files.
|
||||||
|
|
||||||
This test ensures that changes to StackRunConfig don't break
|
This test ensures that changes to StackRunConfig don't break
|
||||||
existing run.yaml files from previous versions.
|
existing config.yaml files from previous versions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
@ -36,10 +36,10 @@ def get_test_configs():
|
||||||
else:
|
else:
|
||||||
# Local mode: test current distribution configs
|
# Local mode: test current distribution configs
|
||||||
repo_root = Path(__file__).parent.parent.parent
|
repo_root = Path(__file__).parent.parent.parent
|
||||||
config_files = sorted((repo_root / "src" / "llama_stack" / "distributions").glob("*/run.yaml"))
|
config_files = sorted((repo_root / "src" / "llama_stack" / "distributions").glob("*/config.yaml"))
|
||||||
|
|
||||||
if not config_files:
|
if not config_files:
|
||||||
pytest.skip("No run.yaml files found in distributions/")
|
pytest.skip("No config.yaml files found in distributions/")
|
||||||
|
|
||||||
return config_files
|
return config_files
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ Here are the most important options:
|
||||||
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
||||||
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:starter:8322`)
|
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:starter:8322`)
|
||||||
- a URL which points to a Llama Stack distribution server
|
- a URL which points to a Llama Stack distribution server
|
||||||
- a distribution name (e.g., `starter`) or a path to a `run.yaml` file
|
- a distribution name (e.g., `starter`) or a path to a `config.yaml` file
|
||||||
- a comma-separated list of api=provider pairs, e.g. `inference=ollama,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
- a comma-separated list of api=provider pairs, e.g. `inference=ollama,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||||
- `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.
|
- `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -151,7 +151,7 @@ def pytest_addoption(parser):
|
||||||
"""
|
"""
|
||||||
a 'pointer' to the stack. this can be either be:
|
a 'pointer' to the stack. this can be either be:
|
||||||
(a) a template name like `starter`, or
|
(a) a template name like `starter`, or
|
||||||
(b) a path to a run.yaml file, or
|
(b) a path to a config.yaml file, or
|
||||||
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
|
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
|
||||||
(d) a server config like `server:ci-tests`, or
|
(d) a server config like `server:ci-tests`, or
|
||||||
(e) a docker config like `docker:ci-tests` (builds and runs container)
|
(e) a docker config like `docker:ci-tests` (builds and runs container)
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from llama_stack.core.storage.datatypes import (
|
||||||
|
|
||||||
def test_starter_distribution_config_loads_and_resolves():
|
def test_starter_distribution_config_loads_and_resolves():
|
||||||
"""Integration: Actual starter config should parse and have correct storage structure."""
|
"""Integration: Actual starter config should parse and have correct storage structure."""
|
||||||
with open("llama_stack/distributions/starter/run.yaml") as f:
|
with open("llama_stack/distributions/starter/config.yaml") as f:
|
||||||
config_dict = yaml.safe_load(f)
|
config_dict = yaml.safe_load(f)
|
||||||
|
|
||||||
config = StackConfig(**config_dict)
|
config = StackConfig(**config_dict)
|
||||||
|
|
@ -47,7 +47,7 @@ def test_starter_distribution_config_loads_and_resolves():
|
||||||
|
|
||||||
def test_postgres_demo_distribution_config_loads():
|
def test_postgres_demo_distribution_config_loads():
|
||||||
"""Integration: Postgres demo should use Postgres backend for all stores."""
|
"""Integration: Postgres demo should use Postgres backend for all stores."""
|
||||||
with open("llama_stack/distributions/postgres-demo/run.yaml") as f:
|
with open("llama_stack/distributions/postgres-demo/config.yaml") as f:
|
||||||
config_dict = yaml.safe_load(f)
|
config_dict = yaml.safe_load(f)
|
||||||
|
|
||||||
config = StackConfig(**config_dict)
|
config = StackConfig(**config_dict)
|
||||||
|
|
|
||||||
|
|
@ -302,7 +302,7 @@ def test_providers_flag_generates_config_with_api_keys():
|
||||||
# Read the generated config file
|
# Read the generated config file
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
|
|
||||||
config_file = DISTRIBS_BASE_DIR / "providers-run" / "run.yaml"
|
config_file = DISTRIBS_BASE_DIR / "providers-run" / "config.yaml"
|
||||||
with open(config_file) as f:
|
with open(config_file) as f:
|
||||||
config_dict = yaml.safe_load(f)
|
config_dict = yaml.safe_load(f)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ def mock_distribs_base_dir(tmp_path):
|
||||||
starter_custom = custom_dir / "starter"
|
starter_custom = custom_dir / "starter"
|
||||||
starter_custom.mkdir()
|
starter_custom.mkdir()
|
||||||
(starter_custom / "starter-build.yaml").write_text("# build config")
|
(starter_custom / "starter-build.yaml").write_text("# build config")
|
||||||
(starter_custom / "starter-run.yaml").write_text("# run config")
|
(starter_custom / "starter-config.yaml").write_text("# run config")
|
||||||
|
|
||||||
return custom_dir
|
return custom_dir
|
||||||
|
|
||||||
|
|
@ -48,7 +48,7 @@ def mock_distro_dir(tmp_path):
|
||||||
distro_path = distro_dir / distro_name
|
distro_path = distro_dir / distro_name
|
||||||
distro_path.mkdir()
|
distro_path.mkdir()
|
||||||
(distro_path / "build.yaml").write_text("# build config")
|
(distro_path / "build.yaml").write_text("# build config")
|
||||||
(distro_path / "run.yaml").write_text("# run config")
|
(distro_path / "config.yaml").write_text("# run config")
|
||||||
|
|
||||||
return distro_dir
|
return distro_dir
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue