mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
Merge 0f4790f531 into 4237eb4aaa
This commit is contained in:
commit
c06f681a02
102 changed files with 971 additions and 1030 deletions
2
.github/workflows/README.md
vendored
2
.github/workflows/README.md
vendored
|
|
@ -4,7 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
|
|
||||||
| Name | File | Purpose |
|
| Name | File | Purpose |
|
||||||
| ---- | ---- | ------- |
|
| ---- | ---- | ------- |
|
||||||
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs |
|
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for config.yaml files |
|
||||||
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
||||||
|
|
|
||||||
52
.github/workflows/backward-compat.yml
vendored
52
.github/workflows/backward-compat.yml
vendored
|
|
@ -1,6 +1,6 @@
|
||||||
name: Backward Compatibility Check
|
name: Backward Compatibility Check
|
||||||
|
|
||||||
run-name: Check backward compatibility for run.yaml configs
|
run-name: Check backward compatibility for config.yaml files
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
|
|
@ -12,7 +12,7 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- 'src/llama_stack/core/datatypes.py'
|
- 'src/llama_stack/core/datatypes.py'
|
||||||
- 'src/llama_stack/providers/datatypes.py'
|
- 'src/llama_stack/providers/datatypes.py'
|
||||||
- 'src/llama_stack/distributions/**/run.yaml'
|
- 'src/llama_stack/distributions/**/config.yaml'
|
||||||
- 'tests/backward_compat/**'
|
- 'tests/backward_compat/**'
|
||||||
- '.github/workflows/backward-compat.yml'
|
- '.github/workflows/backward-compat.yml'
|
||||||
|
|
||||||
|
|
@ -45,15 +45,15 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
uv sync --group dev
|
uv sync --group dev
|
||||||
|
|
||||||
- name: Extract run.yaml files from main branch
|
- name: Extract config.yaml files from main branch
|
||||||
id: extract_configs
|
id: extract_configs
|
||||||
run: |
|
run: |
|
||||||
# Get list of run.yaml paths from main
|
# Get list of config.yaml paths from main
|
||||||
git fetch origin main
|
git fetch origin main
|
||||||
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true)
|
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/config.yaml$" || true)
|
||||||
|
|
||||||
if [ -z "$CONFIG_PATHS" ]; then
|
if [ -z "$CONFIG_PATHS" ]; then
|
||||||
echo "No run.yaml files found in main branch"
|
echo "No config.yaml files found in main branch"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -125,7 +125,7 @@ jobs:
|
||||||
echo ""
|
echo ""
|
||||||
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
|
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
|
||||||
echo ""
|
echo ""
|
||||||
echo "This PR introduces backward-incompatible changes to run.yaml."
|
echo "This PR introduces backward-incompatible changes to config.yaml."
|
||||||
echo "The changes have been properly acknowledged."
|
echo "The changes have been properly acknowledged."
|
||||||
echo ""
|
echo ""
|
||||||
exit 0 # Pass the check
|
exit 0 # Pass the check
|
||||||
|
|
@ -133,7 +133,7 @@ jobs:
|
||||||
echo ""
|
echo ""
|
||||||
echo "❌ ERROR: Breaking changes detected without acknowledgment"
|
echo "❌ ERROR: Breaking changes detected without acknowledgment"
|
||||||
echo ""
|
echo ""
|
||||||
echo "This PR introduces backward-incompatible changes to run.yaml"
|
echo "This PR introduces backward-incompatible changes to config.yaml"
|
||||||
echo "that will break existing user configurations."
|
echo "that will break existing user configurations."
|
||||||
echo ""
|
echo ""
|
||||||
echo "To acknowledge this breaking change, do ONE of:"
|
echo "To acknowledge this breaking change, do ONE of:"
|
||||||
|
|
@ -155,11 +155,11 @@ jobs:
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Extract ci-tests run.yaml from main
|
- name: Extract ci-tests config.yaml from main
|
||||||
run: |
|
run: |
|
||||||
git fetch origin main
|
git fetch origin main
|
||||||
git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml
|
git show origin/main:src/llama_stack/distributions/ci-tests/config.yaml > /tmp/main-ci-tests-config.yaml
|
||||||
echo "Extracted ci-tests run.yaml from main branch"
|
echo "Extracted ci-tests config.yaml from main branch"
|
||||||
|
|
||||||
- name: Setup test environment
|
- name: Setup test environment
|
||||||
uses: ./.github/actions/setup-test-environment
|
uses: ./.github/actions/setup-test-environment
|
||||||
|
|
@ -175,7 +175,7 @@ jobs:
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: /tmp/main-ci-tests-run.yaml
|
stack-config: /tmp/main-ci-tests-config.yaml
|
||||||
setup: 'ollama'
|
setup: 'ollama'
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: 'base'
|
suite: 'base'
|
||||||
|
|
@ -258,21 +258,21 @@ jobs:
|
||||||
env:
|
env:
|
||||||
GH_TOKEN: ${{ github.token }}
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
- name: Extract ci-tests run.yaml from release
|
- name: Extract ci-tests config.yaml from release
|
||||||
if: steps.get_release.outputs.has_release == 'true'
|
if: steps.get_release.outputs.has_release == 'true'
|
||||||
id: extract_config
|
id: extract_config
|
||||||
run: |
|
run: |
|
||||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
|
||||||
# Try with src/ prefix first (newer releases), then without (older releases)
|
# Try with src/ prefix first (newer releases), then without (older releases)
|
||||||
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
|
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/config.yaml" > /tmp/release-ci-tests-config.yaml 2>/dev/null; then
|
||||||
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)"
|
echo "Extracted ci-tests config.yaml from release $RELEASE_TAG (src/ path)"
|
||||||
echo "has_config=true" >> $GITHUB_OUTPUT
|
echo "has_config=true" >> $GITHUB_OUTPUT
|
||||||
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
|
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/config.yaml" > /tmp/release-ci-tests-config.yaml 2>/dev/null; then
|
||||||
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)"
|
echo "Extracted ci-tests config.yaml from release $RELEASE_TAG (old path)"
|
||||||
echo "has_config=true" >> $GITHUB_OUTPUT
|
echo "has_config=true" >> $GITHUB_OUTPUT
|
||||||
else
|
else
|
||||||
echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG"
|
echo "::warning::ci-tests/config.yaml not found in release $RELEASE_TAG"
|
||||||
echo "has_config=false" >> $GITHUB_OUTPUT
|
echo "has_config=false" >> $GITHUB_OUTPUT
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -292,7 +292,7 @@ jobs:
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: /tmp/release-ci-tests-run.yaml
|
stack-config: /tmp/release-ci-tests-config.yaml
|
||||||
setup: 'ollama'
|
setup: 'ollama'
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: 'base'
|
suite: 'base'
|
||||||
|
|
@ -318,7 +318,7 @@ jobs:
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: /tmp/release-ci-tests-run.yaml
|
stack-config: /tmp/release-ci-tests-config.yaml
|
||||||
setup: 'ollama'
|
setup: 'ollama'
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: 'base'
|
suite: 'base'
|
||||||
|
|
@ -447,11 +447,11 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
|
||||||
# Get run.yaml files from the release (try both src/ and old path)
|
# Get config.yaml files from the release (try both src/ and old path)
|
||||||
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true)
|
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/config.yaml$" || true)
|
||||||
|
|
||||||
if [ -z "$CONFIG_PATHS" ]; then
|
if [ -z "$CONFIG_PATHS" ]; then
|
||||||
echo "::warning::No run.yaml files found in release $RELEASE_TAG"
|
echo "::warning::No config.yaml files found in release $RELEASE_TAG"
|
||||||
echo "has_configs=false" >> $GITHUB_OUTPUT
|
echo "has_configs=false" >> $GITHUB_OUTPUT
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
@ -523,7 +523,7 @@ jobs:
|
||||||
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
|
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
|
||||||
|
|
||||||
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
||||||
- Existing run.yaml configurations will fail validation
|
- Existing config.yaml configurations will fail validation
|
||||||
|
|
||||||
The tests pass on \`main\` but fail with this PR's changes.
|
The tests pass on \`main\` but fail with this PR's changes.
|
||||||
|
|
||||||
|
|
@ -543,7 +543,7 @@ jobs:
|
||||||
- Tests **PASS** on main branch ✅
|
- Tests **PASS** on main branch ✅
|
||||||
- Tests **FAIL** on PR branch ❌
|
- Tests **FAIL** on PR branch ❌
|
||||||
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
||||||
- Existing run.yaml configurations will fail validation
|
- Existing config.yaml configurations will fail validation
|
||||||
|
|
||||||
> **Note:** This is informational only and does not block merge.
|
> **Note:** This is informational only and does not block merge.
|
||||||
> Consider whether this breaking change is acceptable for users.
|
> Consider whether this breaking change is acceptable for users.
|
||||||
|
|
@ -570,7 +570,7 @@ jobs:
|
||||||
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||||
## ✅ Release Schema Compatibility Passed
|
## ✅ Release Schema Compatibility Passed
|
||||||
|
|
||||||
All run.yaml configs from release \`$RELEASE_TAG\` are compatible.
|
All config.yaml configs from release \`$RELEASE_TAG\` are compatible.
|
||||||
This PR maintains backward compatibility with the latest release.
|
This PR maintains backward compatibility with the latest release.
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
18
.github/workflows/integration-auth-tests.yml
vendored
18
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -72,7 +72,7 @@ jobs:
|
||||||
if: ${{ matrix.auth-provider == 'oauth2_token' }}
|
if: ${{ matrix.auth-provider == 'oauth2_token' }}
|
||||||
run: |
|
run: |
|
||||||
run_dir=$(mktemp -d)
|
run_dir=$(mktemp -d)
|
||||||
cat <<'EOF' > $run_dir/run.yaml
|
cat <<'EOF' > $run_dir/config.yaml
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: kube
|
image_name: kube
|
||||||
apis: []
|
apis: []
|
||||||
|
|
@ -101,17 +101,17 @@ jobs:
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
EOF
|
EOF
|
||||||
yq eval '.server.auth.provider_config.type = "${{ matrix.auth-provider }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.type = "${{ matrix.auth-provider }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.tls_cafile = "${{ env.KUBERNETES_CA_CERT_PATH }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.tls_cafile = "${{ env.KUBERNETES_CA_CERT_PATH }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.issuer = "${{ env.KUBERNETES_ISSUER }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.issuer = "${{ env.KUBERNETES_ISSUER }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.audience = "${{ env.KUBERNETES_AUDIENCE }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.audience = "${{ env.KUBERNETES_AUDIENCE }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.jwks.uri = "${{ env.KUBERNETES_API_SERVER_URL }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.jwks.uri = "${{ env.KUBERNETES_API_SERVER_URL }}"' -i $run_dir/config.yaml
|
||||||
yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/run.yaml
|
yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/config.yaml
|
||||||
cat $run_dir/run.yaml
|
cat $run_dir/config.yaml
|
||||||
|
|
||||||
# avoid line breaks in the server log, especially because we grep it below.
|
# avoid line breaks in the server log, especially because we grep it below.
|
||||||
export LLAMA_STACK_LOG_WIDTH=200
|
export LLAMA_STACK_LOG_WIDTH=200
|
||||||
nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &
|
nohup uv run llama stack run $run_dir/config.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
10
.github/workflows/providers-build.yml
vendored
10
.github/workflows/providers-build.yml
vendored
|
|
@ -113,10 +113,10 @@ jobs:
|
||||||
|
|
||||||
- name: Build container image
|
- name: Build container image
|
||||||
run: |
|
run: |
|
||||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/config.yaml)
|
||||||
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
|
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml"
|
||||||
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
||||||
fi
|
fi
|
||||||
|
|
@ -155,14 +155,14 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
yq -i '
|
yq -i '
|
||||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||||
' src/llama_stack/distributions/ci-tests/build.yaml
|
' src/llama_stack/distributions/ci-tests/config.yaml
|
||||||
|
|
||||||
- name: Build UBI9 container image
|
- name: Build UBI9 container image
|
||||||
run: |
|
run: |
|
||||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/config.yaml)
|
||||||
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
|
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml"
|
||||||
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
||||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
2
.github/workflows/providers-list-deps.yml
vendored
2
.github/workflows/providers-list-deps.yml
vendored
|
|
@ -102,4 +102,4 @@ jobs:
|
||||||
USE_COPY_NOT_MOUNT: "true"
|
USE_COPY_NOT_MOUNT: "true"
|
||||||
LLAMA_STACK_DIR: "."
|
LLAMA_STACK_DIR: "."
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml
|
uv run llama stack list-deps src/llama_stack/distributions/ci-tests/config.yaml
|
||||||
|
|
|
||||||
|
|
@ -43,8 +43,8 @@ jobs:
|
||||||
nohup ramalama serve llama3.2:3b-instruct-fp16 > ramalama_server.log 2>&1 &
|
nohup ramalama serve llama3.2:3b-instruct-fp16 > ramalama_server.log 2>&1 &
|
||||||
- name: Apply image type to config file
|
- name: Apply image type to config file
|
||||||
run: |
|
run: |
|
||||||
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
|
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/config.yaml
|
||||||
cat tests/external/ramalama-stack/run.yaml
|
cat tests/external/ramalama-stack/config.yaml
|
||||||
|
|
||||||
- name: Install distribution dependencies
|
- name: Install distribution dependencies
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -59,7 +59,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ramalama-stack-test/bin/activate
|
source ramalama-stack-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &
|
nohup llama stack run tests/external/ramalama-stack/config.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
6
.github/workflows/test-external.yml
vendored
6
.github/workflows/test-external.yml
vendored
|
|
@ -44,14 +44,14 @@ jobs:
|
||||||
|
|
||||||
- name: Print distro dependencies
|
- name: Print distro dependencies
|
||||||
run: |
|
run: |
|
||||||
uv run --no-sync llama stack list-deps tests/external/build.yaml
|
uv run --no-sync llama stack list-deps tests/external/config.yaml
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Build distro from config file
|
||||||
run: |
|
run: |
|
||||||
uv venv ci-test
|
uv venv ci-test
|
||||||
source ci-test/bin/activate
|
source ci-test/bin/activate
|
||||||
uv pip install -e .
|
uv pip install -e .
|
||||||
LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/build.yaml | xargs -L1 uv pip install
|
LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/config.yaml | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
@ -62,7 +62,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ci-test/bin/activate
|
source ci-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 &
|
nohup llama stack run tests/external/config.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
614
CHANGELOG.md
Normal file
614
CHANGELOG.md
Normal file
|
|
@ -0,0 +1,614 @@
|
||||||
|
# Changelog
|
||||||
|
|
||||||
|
# v0.2.20
|
||||||
|
Published on: 2025-08-29T22:25:32Z
|
||||||
|
|
||||||
|
Here are some key changes that are coming as part of this release.
|
||||||
|
|
||||||
|
### Build and Environment
|
||||||
|
|
||||||
|
- Environment improvements: fixed env var replacement to preserve types.
|
||||||
|
- Docker stability: fixed container startup failures for Fireworks AI provider.
|
||||||
|
- Removed absolute paths in build for better portability.
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
- UI Enhancements: Implemented file upload and VectorDB creation/configuration directly in UI.
|
||||||
|
- Vector Store Improvements: Added keyword, vector, and hybrid search inside vector store.
|
||||||
|
- Added S3 authorization support for file providers.
|
||||||
|
- SQL Store: Added inequality support to where clause.
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
- Fixed post-training docs.
|
||||||
|
- Added Contributor Guidelines for creating Internal vs. External providers.
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
- Removed unsupported bfcl scoring function.
|
||||||
|
- Multiple reliability and configuration fixes for providers and environment handling.
|
||||||
|
|
||||||
|
### Engineering / Chores
|
||||||
|
|
||||||
|
- Cleaner internal development setup with consistent paths.
|
||||||
|
- Incremental improvements to provider integration and vector store behavior.
|
||||||
|
|
||||||
|
|
||||||
|
### New Contributors
|
||||||
|
- @omertuc made their first contribution in #3270
|
||||||
|
- @r3v5 made their first contribution in vector store hybrid search
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.19
|
||||||
|
Published on: 2025-08-26T22:06:55Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* feat: Add CORS configuration support for server by @skamenan7 in https://github.com/llamastack/llama-stack/pull/3201
|
||||||
|
* feat(api): introduce /rerank by @ehhuang in https://github.com/llamastack/llama-stack/pull/2940
|
||||||
|
* feat: Add S3 Files Provider by @mattf in https://github.com/llamastack/llama-stack/pull/3202
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.18
|
||||||
|
Published on: 2025-08-20T01:09:27Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* Add moderations create API
|
||||||
|
* Hybrid search in Milvus
|
||||||
|
* Numerous Responses API improvements
|
||||||
|
* Documentation updates
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.17
|
||||||
|
Published on: 2025-08-05T01:51:14Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* feat(tests): introduce inference record/replay to increase test reliability by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2941
|
||||||
|
* fix(library_client): improve initialization error handling and prevent AttributeError by @mattf in https://github.com/meta-llama/llama-stack/pull/2944
|
||||||
|
* fix: use OLLAMA_URL to activate Ollama provider in starter by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2963
|
||||||
|
* feat(UI): adding MVP playground UI by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2828
|
||||||
|
* Standardization of errors (@nathan-weinberg)
|
||||||
|
* feat: Enable DPO training with HuggingFace inline provider by @Nehanth in https://github.com/meta-llama/llama-stack/pull/2825
|
||||||
|
* chore: rename templates to distributions by @ashwinb in https://github.com/meta-llama/llama-stack/pull/3035
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.16
|
||||||
|
Published on: 2025-07-28T23:35:23Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc.
|
||||||
|
* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [config.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/config.yaml) for more details.
|
||||||
|
* All tests migrated to pytest now (thanks @Elbehery)
|
||||||
|
* DPO implementation in the post-training provider (thanks @Nehanth)
|
||||||
|
* (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back.
|
||||||
|
* `inline::vllm` provider is gone thank you very much
|
||||||
|
* several improvements to OpenAI inference implementations and LiteLLM backend (thanks @mattf)
|
||||||
|
* Chroma now supports Vector Store API (thanks @franciscojavierarceo).
|
||||||
|
* Authorization improvements: Vector Store/File APIs now supports access control (thanks @franciscojavierarceo); Telemetry read APIs are gated according to logged-in user's roles.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.15
|
||||||
|
Published on: 2025-07-16T03:30:01Z
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.14
|
||||||
|
Published on: 2025-07-04T16:06:48Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* Support for Llama Guard 4
|
||||||
|
* Added Milvus support to vector-stores API
|
||||||
|
* Documentation and zero-to-hero updates for latest APIs
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.13
|
||||||
|
Published on: 2025-06-28T04:28:11Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* search_mode support in OpenAI vector store API
|
||||||
|
* Security fixes
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.12
|
||||||
|
Published on: 2025-06-20T22:52:12Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* Filter support in file search
|
||||||
|
* Support auth attributes in inference and response stores
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.11
|
||||||
|
Published on: 2025-06-17T20:26:26Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* OpenAI-compatible vector store APIs
|
||||||
|
* Hybrid Search in Sqlite-vec
|
||||||
|
* File search tool in Responses API
|
||||||
|
* Pagination in inference and response stores
|
||||||
|
* Added `suffix` to completions API for fill-in-the-middle tasks
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.10.1
|
||||||
|
Published on: 2025-06-06T20:11:02Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* ChromaDB provider fix
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.10
|
||||||
|
Published on: 2025-06-05T23:21:45Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* OpenAI-compatible embeddings API
|
||||||
|
* OpenAI-compatible Files API
|
||||||
|
* Postgres support in starter distro
|
||||||
|
* Enable ingestion of precomputed embeddings
|
||||||
|
* Full multi-turn support in Responses API
|
||||||
|
* Fine-grained access control policy
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.9
|
||||||
|
Published on: 2025-05-30T20:01:56Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
* Added initial streaming support in Responses API
|
||||||
|
* UI view for Responses
|
||||||
|
* Postgres inference store support
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.8
|
||||||
|
Published on: 2025-05-27T21:03:47Z
|
||||||
|
|
||||||
|
# Release v0.2.8
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* Server-side MCP with auth firewalls now works in the Stack - both for Agents and Responses
|
||||||
|
* Get chat completions APIs and UI to show chat completions
|
||||||
|
* Enable keyword search for sqlite-vec
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.7
|
||||||
|
Published on: 2025-05-16T20:38:10Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
This is a small update. But a couple highlights:
|
||||||
|
|
||||||
|
* feat: function tools in OpenAI Responses by @bbrowning in https://github.com/meta-llama/llama-stack/pull/2094, getting closer to ready. Streaming is the next missing piece.
|
||||||
|
* feat: Adding support for customizing chunk context in RAG insertion and querying by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2134
|
||||||
|
* feat: scaffolding for Llama Stack UI by @ehhuang in https://github.com/meta-llama/llama-stack/pull/2149, more to come in the coming releases.
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.6
|
||||||
|
Published on: 2025-05-12T18:06:52Z
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.5
|
||||||
|
Published on: 2025-05-04T20:16:49Z
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.4
|
||||||
|
Published on: 2025-04-29T17:26:01Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* One-liner to install and run Llama Stack yay! by @reluctantfuturist in https://github.com/meta-llama/llama-stack/pull/1383
|
||||||
|
* support for NVIDIA NeMo datastore by @raspawar in https://github.com/meta-llama/llama-stack/pull/1852
|
||||||
|
* (yuge!) Kubernetes authentication by @leseb in https://github.com/meta-llama/llama-stack/pull/1778
|
||||||
|
* (yuge!) OpenAI Responses API by @bbrowning in https://github.com/meta-llama/llama-stack/pull/1989
|
||||||
|
* add api.llama provider, llama-guard-4 model by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2058
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.3
|
||||||
|
Published on: 2025-04-25T22:46:21Z
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
* OpenAI compatible inference endpoints and client-SDK support. `client.chat.completions.create()` now works.
|
||||||
|
* significant improvements and functionality added to the nVIDIA distribution
|
||||||
|
* many improvements to the test verification suite.
|
||||||
|
* new inference providers: Ramalama, IBM WatsonX
|
||||||
|
* many improvements to the Playground UI
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.2
|
||||||
|
Published on: 2025-04-13T01:19:49Z
|
||||||
|
|
||||||
|
## Main changes
|
||||||
|
|
||||||
|
- Bring Your Own Provider (@leseb) - use out-of-tree provider code to execute the distribution server
|
||||||
|
- OpenAI compatible inference API in progress (@bbrowning)
|
||||||
|
- Provider verifications (@ehhuang)
|
||||||
|
- Many updates and fixes to playground
|
||||||
|
- Several llama4 related fixes
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.1
|
||||||
|
Published on: 2025-04-05T23:13:00Z
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.2.0
|
||||||
|
Published on: 2025-04-05T19:04:29Z
|
||||||
|
|
||||||
|
## Llama 4 Support
|
||||||
|
|
||||||
|
Checkout more at https://www.llama.com
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.9
|
||||||
|
Published on: 2025-03-29T00:52:23Z
|
||||||
|
|
||||||
|
### Build and Test Agents
|
||||||
|
* Agents: Entire document context with attachments
|
||||||
|
* RAG: Documentation with sqlite-vec faiss comparison
|
||||||
|
* Getting started: Fixes to getting started notebook.
|
||||||
|
|
||||||
|
### Agent Evals and Model Customization
|
||||||
|
* (**New**) Post-training: Add nemo customizer
|
||||||
|
|
||||||
|
### Better Engineering
|
||||||
|
* Moved sqlite-vec to non-blocking calls
|
||||||
|
* Don't return a payload on file delete
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.8
|
||||||
|
Published on: 2025-03-24T01:28:50Z
|
||||||
|
|
||||||
|
# v0.1.8 Release Notes
|
||||||
|
|
||||||
|
### Build and Test Agents
|
||||||
|
* Safety: Integrated NVIDIA as a safety provider.
|
||||||
|
* VectorDB: Added Qdrant as an inline provider.
|
||||||
|
* Agents: Added support for multiple tool groups in agents.
|
||||||
|
* Agents: Simplified imports for Agents in client package
|
||||||
|
|
||||||
|
|
||||||
|
### Agent Evals and Model Customization
|
||||||
|
* Introduced DocVQA and IfEval benchmarks.
|
||||||
|
|
||||||
|
### Deploying and Monitoring Agents
|
||||||
|
* Introduced a Containerfile and image workflow for the Playground.
|
||||||
|
* Implemented support for Bearer (API Key) authentication.
|
||||||
|
* Added attribute-based access control for resources.
|
||||||
|
* Fixes on docker deployments: use --pull always and standardized the default port to 8321
|
||||||
|
* Deprecated: /v1/inspect/providers use /v1/providers/ instead
|
||||||
|
|
||||||
|
### Better Engineering
|
||||||
|
* Consolidated scripts under the ./scripts directory.
|
||||||
|
* Addressed mypy violations in various modules.
|
||||||
|
* Added Dependabot scans for Python dependencies.
|
||||||
|
* Implemented a scheduled workflow to update the changelog automatically.
|
||||||
|
* Enforced concurrency to reduce CI loads.
|
||||||
|
|
||||||
|
|
||||||
|
### New Contributors
|
||||||
|
* @cmodi-meta made their first contribution in https://github.com/meta-llama/llama-stack/pull/1650
|
||||||
|
* @jeffmaury made their first contribution in https://github.com/meta-llama/llama-stack/pull/1671
|
||||||
|
* @derekhiggins made their first contribution in https://github.com/meta-llama/llama-stack/pull/1698
|
||||||
|
* @Bobbins228 made their first contribution in https://github.com/meta-llama/llama-stack/pull/1745
|
||||||
|
|
||||||
|
**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.7...v0.1.8
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.7
|
||||||
|
Published on: 2025-03-14T22:30:51Z
|
||||||
|
|
||||||
|
## 0.1.7 Release Notes
|
||||||
|
|
||||||
|
### Build and Test Agents
|
||||||
|
* Inference: ImageType is now refactored to LlamaStackImageType
|
||||||
|
* Inference: Added tests to measure TTFT
|
||||||
|
* Inference: Bring back usage metrics
|
||||||
|
* Agents: Added endpoint for get agent, list agents and list sessions
|
||||||
|
* Agents: Automated conversion of type hints in client tool for lite llm format
|
||||||
|
* Agents: Deprecated ToolResponseMessage in agent.resume API
|
||||||
|
* Added Provider API for listing and inspecting provider info
|
||||||
|
|
||||||
|
### Agent Evals and Model Customization
|
||||||
|
* Eval: Added new eval benchmarks Math 500 and BFCL v3
|
||||||
|
* Deploy and Monitoring of Agents
|
||||||
|
* Telemetry: Fix tracing to work across coroutines
|
||||||
|
|
||||||
|
### Better Engineering
|
||||||
|
* Display code coverage for unit tests
|
||||||
|
* Updated call sites (inference, tool calls, agents) to move to async non blocking calls
|
||||||
|
* Unit tests also run on Python 3.11, 3.12, and 3.13
|
||||||
|
* Added ollama inference to Integration tests CI
|
||||||
|
* Improved documentation across examples, testing, CLI, updated providers table )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.6
|
||||||
|
Published on: 2025-03-08T04:35:08Z
|
||||||
|
|
||||||
|
## 0.1.6 Release Notes
|
||||||
|
|
||||||
|
### Build and Test Agents
|
||||||
|
* Inference: Fixed support for inline vllm provider
|
||||||
|
* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb)
|
||||||
|
* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples
|
||||||
|
* Agent: Unify tools and Python SDK Agents API
|
||||||
|
* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls
|
||||||
|
* Agent: Support python functions without @client_tool decorator as client tools
|
||||||
|
* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format
|
||||||
|
* VectorIO: MilvusDB support added
|
||||||
|
|
||||||
|
### Agent Evals and Model Customization
|
||||||
|
* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb)
|
||||||
|
* Eval: Documentation for eval, scoring, adding new benchmarks
|
||||||
|
* Eval: Distribution template to run benchmarks on llama & non-llama models
|
||||||
|
* Eval: Ability to register new custom LLM-as-judge scoring functions
|
||||||
|
* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details.
|
||||||
|
|
||||||
|
### Deploy and Monitoring of Agents
|
||||||
|
* Better support for different log levels across all components for better monitoring
|
||||||
|
|
||||||
|
### Better Engineering
|
||||||
|
* Enhance OpenAPI spec to include Error types across all APIs
|
||||||
|
* Moved all tests to /tests and created unit tests to run on each PR
|
||||||
|
* Removed all dependencies on llama-models repo
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.5.1
|
||||||
|
Published on: 2025-02-28T22:37:44Z
|
||||||
|
|
||||||
|
## 0.1.5.1 Release Notes
|
||||||
|
* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328
|
||||||
|
|
||||||
|
**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.5...v0.1.5.1
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.5
|
||||||
|
Published on: 2025-02-28T18:14:01Z
|
||||||
|
|
||||||
|
## 0.1.5 Release Notes
|
||||||
|
### Build Agents
|
||||||
|
* Inference: Support more non-llama models (openai, anthropic, gemini)
|
||||||
|
* Inference: Can use the provider's model name in addition to the HF alias
|
||||||
|
* Inference: Fixed issues with calling tools that weren't specified in the prompt
|
||||||
|
* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling
|
||||||
|
* Embeddings: Added support for Nemo retriever embedding models
|
||||||
|
* Tools: Added support for MCP tools in Ollama Distribution
|
||||||
|
* Distributions: Added new Groq distribution
|
||||||
|
|
||||||
|
### Customize Models
|
||||||
|
* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model
|
||||||
|
|
||||||
|
### Monitor agents
|
||||||
|
* More comprehensive logging of agent steps including client tools
|
||||||
|
* Telemetry inputs/outputs are now structured and queryable
|
||||||
|
* Ability to retrieve agents session, turn, step by ids
|
||||||
|
|
||||||
|
### Better Engineering
|
||||||
|
* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin
|
||||||
|
* Move most logging to use logger instead of prints
|
||||||
|
* Completed text /chat-completion and /completion tests
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.4
|
||||||
|
Published on: 2025-02-25T00:02:43Z
|
||||||
|
|
||||||
|
## v0.1.4 Release Notes
|
||||||
|
Here are the key changes coming as part of this release:
|
||||||
|
|
||||||
|
### Build and Test Agents
|
||||||
|
* Inference: Added support for non-llama models
|
||||||
|
* Inference: Added option to list all downloaded models and remove models
|
||||||
|
* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn
|
||||||
|
* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides
|
||||||
|
* Agent: Added logging for agent step start and completion times
|
||||||
|
* Agent: Added support for logging for tool execution metadata
|
||||||
|
* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs
|
||||||
|
* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults
|
||||||
|
* VectorIO: Improved performance of sqlite-vec using chunked writes
|
||||||
|
### Agent Evals and Model Customization
|
||||||
|
* Deprecated api /eval-tasks. Use /eval/benchmark instead
|
||||||
|
* Added CPU training support for TorchTune
|
||||||
|
### Deploy and Monitoring of Agents
|
||||||
|
* Consistent view of client and server tool calls in telemetry
|
||||||
|
### Better Engineering
|
||||||
|
* Made tests more data-driven for consistent evaluation
|
||||||
|
* Fixed documentation links and improved API reference generation
|
||||||
|
* Various small fixes for build scripts and system reliability
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.3
|
||||||
|
Published on: 2025-02-14T20:24:32Z
|
||||||
|
|
||||||
|
## v0.1.3 Release
|
||||||
|
|
||||||
|
Here are some key changes that are coming as part of this release.
|
||||||
|
|
||||||
|
### Build and Test Agents
|
||||||
|
Streamlined the initial development experience
|
||||||
|
- Added support for llama stack run --image-type venv
|
||||||
|
- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration
|
||||||
|
- vLLM improvements for tool calling and logprobs
|
||||||
|
- Better handling of sporadic code_interpreter tool calls
|
||||||
|
|
||||||
|
### Agent Evals
|
||||||
|
Better benchmarking and Agent performance assessment
|
||||||
|
- Renamed eval API /eval-task to /benchmarks
|
||||||
|
- Improved documentation and notebooks for RAG and evals
|
||||||
|
|
||||||
|
### Deploy and Monitoring of Agents
|
||||||
|
Improved production readiness
|
||||||
|
- Added usage metrics collection for chat completions
|
||||||
|
- CLI improvements for provider information
|
||||||
|
- Improved error handling and system reliability
|
||||||
|
- Better model endpoint handling and accessibility
|
||||||
|
- Improved signal handling on distro server
|
||||||
|
|
||||||
|
### Better Engineering
|
||||||
|
Infrastructure and code quality improvements
|
||||||
|
- Faster text-based chat completion tests
|
||||||
|
- Improved testing for non-streaming agent apis
|
||||||
|
- Standardized import formatting with ruff linter
|
||||||
|
- Added conventional commits standard
|
||||||
|
- Fixed documentation parsing issues
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.2
|
||||||
|
Published on: 2025-02-07T22:06:49Z
|
||||||
|
|
||||||
|
# TL;DR
|
||||||
|
- Several stabilizations to development flows after the switch to `uv`
|
||||||
|
- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops)
|
||||||
|
- Added automated rebuilds for ReadTheDocs
|
||||||
|
- Llama Stack server supports HTTPS
|
||||||
|
- Added system prompt overrides support
|
||||||
|
- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan )
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.1
|
||||||
|
Published on: 2025-02-02T02:29:24Z
|
||||||
|
|
||||||
|
A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements.
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.0
|
||||||
|
Published on: 2025-01-24T17:47:47Z
|
||||||
|
|
||||||
|
We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions.
|
||||||
|
|
||||||
|
## Context
|
||||||
|
GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open.
|
||||||
|
|
||||||
|
Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety.
|
||||||
|
|
||||||
|
With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
|
||||||
|
|
||||||
|
## Release
|
||||||
|
After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements.
|
||||||
|
|
||||||
|
There are example standalone apps in llama-stack-apps.
|
||||||
|
|
||||||
|
|
||||||
|
## Key Features of this release
|
||||||
|
|
||||||
|
- **Unified API Layer**
|
||||||
|
- Inference: Run LLM models
|
||||||
|
- RAG: Store and retrieve knowledge for RAG
|
||||||
|
- Agents: Build multi-step agentic workflows
|
||||||
|
- Tools: Register tools that can be called by the agent
|
||||||
|
- Safety: Apply content filtering and safety policies
|
||||||
|
- Evaluation: Test model and agent quality
|
||||||
|
- Telemetry: Collect and analyze usage data and complex agentic traces
|
||||||
|
- Post Training ( Coming Soon ): Fine tune models for specific use cases
|
||||||
|
|
||||||
|
- **Rich Provider Ecosystem**
|
||||||
|
- Local Development: Meta's Reference, Ollama
|
||||||
|
- Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras
|
||||||
|
- On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI
|
||||||
|
- On-device: iOS and Android support
|
||||||
|
|
||||||
|
- **Built for Production**
|
||||||
|
- Pre-packaged distributions for common deployment scenarios
|
||||||
|
- Backwards compatibility across model versions
|
||||||
|
- Comprehensive evaluation capabilities
|
||||||
|
- Full observability and monitoring
|
||||||
|
|
||||||
|
- **Multiple developer interfaces**
|
||||||
|
- CLI: Command line interface
|
||||||
|
- Python SDK
|
||||||
|
- Swift iOS SDK
|
||||||
|
- Kotlin Android SDK
|
||||||
|
|
||||||
|
- **Sample llama stack applications**
|
||||||
|
- Python
|
||||||
|
- iOS
|
||||||
|
- Android
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.1.0rc12
|
||||||
|
Published on: 2025-01-22T22:24:01Z
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# v0.0.63
|
||||||
|
Published on: 2024-12-18T07:17:43Z
|
||||||
|
|
||||||
|
A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially.
|
||||||
|
|
||||||
|
**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.0.62...v0.0.63
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
@ -96,7 +96,7 @@ We have built-in functionality to run the supported open-benchmarks using llama-
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' template
|
Spin up llama stack server with 'open-benchmark' template
|
||||||
```
|
```
|
||||||
llama stack run llama_stack/distributions/open-benchmark/run.yaml
|
llama stack run llama_stack/distributions/open-benchmark/config.yaml
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,7 @@ Llama Stack provides OpenAI-compatible RAG capabilities through:
|
||||||
|
|
||||||
## Configuring Default Embedding Models
|
## Configuring Default Embedding Models
|
||||||
|
|
||||||
To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so:
|
To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your config.yaml like so:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
vector_stores:
|
vector_stores:
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,7 @@ Features:
|
||||||
- Context retrieval with token limits
|
- Context retrieval with token limits
|
||||||
|
|
||||||
:::note[Default Configuration]
|
:::note[Default Configuration]
|
||||||
By default, llama stack run.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers.
|
By default, llama stack config.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Model Context Protocol (MCP)
|
## Model Context Protocol (MCP)
|
||||||
|
|
|
||||||
|
|
@ -337,7 +337,7 @@ uv pip install -e .
|
||||||
7. Configure Llama Stack to use the provider:
|
7. Configure Llama Stack to use the provider:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# ~/.llama/run-byoa.yaml
|
# ~/.llama/config.yaml
|
||||||
version: "2"
|
version: "2"
|
||||||
image_name: "llama-stack-api-weather"
|
image_name: "llama-stack-api-weather"
|
||||||
apis:
|
apis:
|
||||||
|
|
@ -356,7 +356,7 @@ server:
|
||||||
8. Run the server:
|
8. Run the server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack run ~/.llama/run-byoa.yaml
|
llama stack run ~/.llama/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Test the API:
|
9. Test the API:
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ We have built-in functionality to run the supported open-benckmarks using llama-
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' template
|
Spin up llama stack server with 'open-benchmark' template
|
||||||
```bash
|
```bash
|
||||||
llama stack run llama_stack/distributions/open-benchmark/run.yaml
|
llama stack run llama_stack/distributions/open-benchmark/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Run eval CLI
|
#### Run eval CLI
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ This guide will walk you through the process of adding a new API provider to Lla
|
||||||
- Begin by reviewing the [core concepts](../concepts/) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
- Begin by reviewing the [core concepts](../concepts/) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
||||||
- Determine the provider type ([Remote](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote) or [Inline](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline)). Remote providers make requests to external services, while inline providers execute implementation locally.
|
- Determine the provider type ([Remote](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote) or [Inline](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline)). Remote providers make requests to external services, while inline providers execute implementation locally.
|
||||||
- Add your provider to the appropriate [Registry](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/registry/). Specify pip dependencies necessary.
|
- Add your provider to the appropriate [Registry](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/registry/). Specify pip dependencies necessary.
|
||||||
- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `build.yaml` and `run.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
|
- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `config.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
|
||||||
|
|
||||||
|
|
||||||
Here are some example PRs to help you get started:
|
Here are some example PRs to help you get started:
|
||||||
|
|
|
||||||
|
|
@ -133,7 +133,7 @@ For more information about the operator, see the [llama-stack-k8s-operator repos
|
||||||
### Step 4: Deploy Llama Stack Server using Operator
|
### Step 4: Deploy Llama Stack Server using Operator
|
||||||
|
|
||||||
Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
|
Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources.
|
||||||
You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).
|
You can optionally override the default `config.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)).
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
cat <<EOF | kubectl apply -f -
|
cat <<EOF | kubectl apply -f -
|
||||||
|
|
@ -155,7 +155,7 @@ spec:
|
||||||
value: "4096"
|
value: "4096"
|
||||||
- name: VLLM_API_TOKEN
|
- name: VLLM_API_TOKEN
|
||||||
value: "fake"
|
value: "fake"
|
||||||
# Optional: override run.yaml from a ConfigMap using userConfig
|
# Optional: override config.yaml from a ConfigMap using userConfig
|
||||||
userConfig:
|
userConfig:
|
||||||
configMap:
|
configMap:
|
||||||
name: llama-stack-config
|
name: llama-stack-config
|
||||||
|
|
@ -172,7 +172,7 @@ EOF
|
||||||
- `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
|
- `server.distribution.image`: (Optional) Custom container image for non-supported distributions. Use this field when deploying a distribution that is not in the supported list. If specified, this takes precedence over `name`.
|
||||||
- `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
|
- `server.containerSpec.port`: Port on which the Llama Stack server listens (default: 8321)
|
||||||
- `server.containerSpec.env`: Environment variables to configure providers:
|
- `server.containerSpec.env`: Environment variables to configure providers:
|
||||||
- `server.userConfig`: (Optional) Override the default `run.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
|
- `server.userConfig`: (Optional) Override the default `config.yaml` using a ConfigMap. See [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec).
|
||||||
- `server.storage.size`: Size of the persistent volume for model and data storage
|
- `server.storage.size`: Size of the persistent volume for model and data storage
|
||||||
- `server.storage.mountPath`: Where to mount the storage in the container
|
- `server.storage.mountPath`: Where to mount the storage in the container
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ This guide walks you through inspecting existing distributions, customising thei
|
||||||
All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
|
All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
|
||||||
|
|
||||||
- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
|
- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
|
||||||
- `run.yaml` – sample run configuration (when provided).
|
- `config.yaml` – sample run configuration (when provided).
|
||||||
- Documentation fragments that power this site.
|
- Documentation fragments that power this site.
|
||||||
|
|
||||||
Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
|
Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
|
||||||
|
|
@ -35,7 +35,7 @@ docker build . \
|
||||||
Handy build arguments:
|
Handy build arguments:
|
||||||
|
|
||||||
- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
|
- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
|
||||||
- `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/run.yaml`).
|
- `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/config.yaml`).
|
||||||
- `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
|
- `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
|
||||||
- `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
|
- `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
|
||||||
- `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
|
- `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
|
||||||
|
|
@ -50,7 +50,7 @@ External providers live outside the main repository but can be bundled by pointi
|
||||||
|
|
||||||
1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
|
1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
|
||||||
2. Update `build.yaml` with the directory and provider entries.
|
2. Update `build.yaml` with the directory and provider entries.
|
||||||
3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/run.yaml` if you want to bake the config.
|
3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/config.yaml` if you want to bake the config.
|
||||||
|
|
||||||
Example `build.yaml` excerpt for a custom Ollama provider:
|
Example `build.yaml` excerpt for a custom Ollama provider:
|
||||||
|
|
||||||
|
|
@ -142,7 +142,7 @@ If you prepared a custom run config, mount it into the container and reference i
|
||||||
```bash
|
```bash
|
||||||
docker run \
|
docker run \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $(pwd)/run.yaml:/app/run.yaml \
|
-v $(pwd)/config.yaml:/app/config.yaml \
|
||||||
llama-stack:starter \
|
llama-stack:starter \
|
||||||
/app/run.yaml
|
/app/config.yaml
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ sidebar_position: 6
|
||||||
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
|
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
|
||||||
|
|
||||||
```{note}
|
```{note}
|
||||||
The default `run.yaml` files generated by templates are starting points for your configuration. For guidance on customizing these files for your specific needs, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
The default `config.yaml` files generated by templates are starting points for your configuration. For guidance on customizing these files for your specific needs, see [Customizing Your config.yaml Configuration](customizing_run_yaml.md).
|
||||||
```
|
```
|
||||||
|
|
||||||
```{dropdown} 👋 Click here for a Sample Configuration File
|
```{dropdown} 👋 Click here for a Sample Configuration File
|
||||||
|
|
@ -195,7 +195,7 @@ You can override environment variables at runtime by setting them in your shell
|
||||||
# Set environment variables in your shell
|
# Set environment variables in your shell
|
||||||
export API_KEY=sk-123
|
export API_KEY=sk-123
|
||||||
export BASE_URL=https://custom-api.com
|
export BASE_URL=https://custom-api.com
|
||||||
llama stack run --config run.yaml
|
llama stack run --config config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Type Safety
|
#### Type Safety
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,16 @@
|
||||||
---
|
---
|
||||||
title: Customizing run.yaml
|
title: Customizing config.yaml
|
||||||
description: Customizing run.yaml files for Llama Stack templates
|
description: Customizing config.yaml files for Llama Stack templates
|
||||||
sidebar_label: Customizing run.yaml
|
sidebar_label: Customizing config.yaml
|
||||||
sidebar_position: 4
|
sidebar_position: 4
|
||||||
---
|
---
|
||||||
# Customizing run.yaml Files
|
# Customizing config.yaml Files
|
||||||
|
|
||||||
The `run.yaml` files generated by Llama Stack templates are **starting points** designed to be customized for your specific needs. They are not meant to be used as-is in production environments.
|
The `config.yaml` files generated by Llama Stack templates are **starting points** designed to be customized for your specific needs. They are not meant to be used as-is in production environments.
|
||||||
|
|
||||||
## Key Points
|
## Key Points
|
||||||
|
|
||||||
- **Templates are starting points**: Generated `run.yaml` files contain defaults for development/testing
|
- **Templates are starting points**: Generated `config.yaml` files contain defaults for development/testing
|
||||||
- **Customization expected**: Update URLs, credentials, models, and settings for your environment
|
- **Customization expected**: Update URLs, credentials, models, and settings for your environment
|
||||||
- **Version control separately**: Keep customized configs in your own repository
|
- **Version control separately**: Keep customized configs in your own repository
|
||||||
- **Environment-specific**: Create different configurations for dev, staging, production
|
- **Environment-specific**: Create different configurations for dev, staging, production
|
||||||
|
|
@ -29,7 +29,7 @@ You can customize:
|
||||||
## Best Practices
|
## Best Practices
|
||||||
|
|
||||||
- Use environment variables for secrets and environment-specific values
|
- Use environment variables for secrets and environment-specific values
|
||||||
- Create separate `run.yaml` files for different environments (dev, staging, prod)
|
- Create separate `config.yaml` files for different environments (dev, staging, prod)
|
||||||
- Document your changes with comments
|
- Document your changes with comments
|
||||||
- Test configurations before deployment
|
- Test configurations before deployment
|
||||||
- Keep your customized configs in version control
|
- Keep your customized configs in version control
|
||||||
|
|
@ -38,8 +38,8 @@ Example structure:
|
||||||
```
|
```
|
||||||
your-project/
|
your-project/
|
||||||
├── configs/
|
├── configs/
|
||||||
│ ├── dev-run.yaml
|
│ ├── dev-config.yaml
|
||||||
│ ├── prod-run.yaml
|
│ ├── prod-config.yaml
|
||||||
└── README.md
|
└── README.md
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ Then, you can access the APIs like `models` and `inference` on the client and ca
|
||||||
response = client.models.list()
|
response = client.models.list()
|
||||||
```
|
```
|
||||||
|
|
||||||
If you've created a [custom distribution](./building_distro), you can also use the run.yaml configuration file directly:
|
If you've created a [custom distribution](./building_distro), you can also use the config.yaml configuration file directly:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
client = LlamaStackAsLibraryClient(config_path)
|
client = LlamaStackAsLibraryClient(config_path)
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ This section provides an overview of the distributions available in Llama Stack.
|
||||||
|
|
||||||
- **[Available Distributions](./list_of_distributions.mdx)** - Complete list and comparison of all distributions
|
- **[Available Distributions](./list_of_distributions.mdx)** - Complete list and comparison of all distributions
|
||||||
- **[Building Custom Distributions](./building_distro.mdx)** - Create your own distribution from scratch
|
- **[Building Custom Distributions](./building_distro.mdx)** - Create your own distribution from scratch
|
||||||
- **[Customizing Configuration](./customizing_run_yaml.mdx)** - Customize run.yaml for your needs
|
- **[Customizing Configuration](./customizing_run_yaml.mdx)** - Customize config.yaml for your needs
|
||||||
- **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions
|
- **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions
|
||||||
- **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code
|
- **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code
|
||||||
- **[Configuration Reference](./configuration.mdx)** - Configuration file format details
|
- **[Configuration Reference](./configuration.mdx)** - Configuration file format details
|
||||||
|
|
|
||||||
|
|
@ -67,11 +67,11 @@ LLAMA_STACK_PORT=5001
|
||||||
docker run \
|
docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run.yaml:/root/my-run.yaml \
|
-v ./config.yaml:/root/my-config.yaml \
|
||||||
-e WATSONX_API_KEY=$WATSONX_API_KEY \
|
-e WATSONX_API_KEY=$WATSONX_API_KEY \
|
||||||
-e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
-e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
||||||
-e WATSONX_BASE_URL=$WATSONX_BASE_URL \
|
-e WATSONX_BASE_URL=$WATSONX_BASE_URL \
|
||||||
llamastack/distribution-watsonx \
|
llamastack/distribution-watsonx \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-config.yaml \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ The only difference vs. the `tgi` distribution is that it runs the Dell-TGI serv
|
||||||
```
|
```
|
||||||
$ cd distributions/dell-tgi/
|
$ cd distributions/dell-tgi/
|
||||||
$ ls
|
$ ls
|
||||||
compose.yaml README.md run.yaml
|
compose.yaml README.md config.yaml
|
||||||
$ docker compose up
|
$ docker compose up
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -65,10 +65,10 @@ registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-meta-llama-3.1
|
||||||
#### Start Llama Stack server pointing to TGI server
|
#### Start Llama Stack server pointing to TGI server
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
|
docker run --pull always --network host -it -p 8321:8321 -v ./config.yaml:/root/my-config.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
|
Make sure in you `config.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
|
||||||
```
|
```
|
||||||
inference:
|
inference:
|
||||||
- provider_id: tgi0
|
- provider_id: tgi0
|
||||||
|
|
|
||||||
|
|
@ -152,14 +152,14 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-config.yaml \
|
||||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
-e DEH_URL=$DEH_URL \
|
-e DEH_URL=$DEH_URL \
|
||||||
-e SAFETY_MODEL=$SAFETY_MODEL \
|
-e SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||||
-e CHROMA_URL=$CHROMA_URL \
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
llamastack/distribution-dell \
|
llamastack/distribution-dell \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-config.yaml \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -84,8 +84,8 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
LLAMA_STACK_PORT=8321
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
|
|
@ -94,8 +94,8 @@ docker run \
|
||||||
--gpu all \
|
--gpu all \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
llamastack/distribution-meta-reference-gpu \
|
llamastack/distribution-meta-reference-gpu \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
@ -103,7 +103,7 @@ docker run \
|
||||||
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
||||||
|
|
||||||
Available run configurations for this distribution:
|
Available run configurations for this distribution:
|
||||||
- `run.yaml`
|
- `config.yaml`
|
||||||
- `run-with-safety.yaml`
|
- `run-with-safety.yaml`
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -113,7 +113,7 @@ Make sure you have the Llama Stack CLI available.
|
||||||
```bash
|
```bash
|
||||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
llama stack run distributions/meta-reference-gpu/config.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -138,8 +138,8 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
LLAMA_STACK_PORT=8321
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
|
|
@ -147,8 +147,8 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
llamastack/distribution-nvidia \
|
llamastack/distribution-nvidia \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
|
|
@ -157,7 +157,7 @@ docker run \
|
||||||
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
||||||
|
|
||||||
Available run configurations for this distribution:
|
Available run configurations for this distribution:
|
||||||
- `run.yaml`
|
- `config.yaml`
|
||||||
- `run-with-safety.yaml`
|
- `run-with-safety.yaml`
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -169,7 +169,7 @@ INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./config.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ Note to start the container with Podman, you can do the same but replace `docker
|
||||||
`podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
|
`podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
|
||||||
with `host.containers.internal`.
|
with `host.containers.internal`.
|
||||||
|
|
||||||
The configuration YAML for the Ollama distribution is available at `distributions/ollama/run.yaml`.
|
The configuration YAML for the Ollama distribution is available at `distributions/ollama/config.yaml`.
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
Docker containers run in their own isolated network namespaces on Linux. To allow the container to communicate with services running on the host via `localhost`, you need `--network=host`. This makes the container use the host's network directly so it can connect to Ollama running on `localhost:11434`.
|
Docker containers run in their own isolated network namespaces on Linux. To allow the container to communicate with services running on the host via `localhost`, you need `--network=host`. This makes the container use the host's network directly so it can connect to Ollama running on `localhost:11434`.
|
||||||
|
|
|
||||||
|
|
@ -222,22 +222,21 @@ def get_provider_spec() -> ProviderSpec:
|
||||||
|
|
||||||
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
||||||
|
|
||||||
To install Llama Stack with this external provider a user can provider the following build.yaml:
|
To install Llama Stack with this external provider a user can provider the following config.yaml:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
version: 2
|
version: 2
|
||||||
distribution_spec:
|
image_name: ramalama
|
||||||
description: Use (an external) Ramalama server for running LLM inference
|
apis:
|
||||||
container_image: null
|
- inference
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_type: remote::ramalama
|
- provider_id: ramalama
|
||||||
module: ramalama_stack==0.3.0a0
|
provider_type: remote::ramalama
|
||||||
image_type: venv
|
module: ramalama_stack==0.3.0a0
|
||||||
image_name: null
|
config: {}
|
||||||
additional_pip_packages:
|
server:
|
||||||
- aiosqlite
|
port: 8321
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
```
|
```
|
||||||
|
|
||||||
No other steps are required beyond installing dependencies with `llama stack list-deps <distro> | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc.
|
No other steps are required beyond installing dependencies with `llama stack list-deps <distro> | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc.
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@ results = await client.vector_stores.search(
|
||||||
|
|
||||||
> **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation.
|
> **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation.
|
||||||
|
|
||||||
**Basic Setup**: Configure vector_io and files providers in your run.yaml
|
**Basic Setup**: Configure vector_io and files providers in your config.yaml
|
||||||
|
|
||||||
## Common Use Cases
|
## Common Use Cases
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,7 @@ Connectors are MCP servers maintained and managed by the Responses API provider.
|
||||||
|
|
||||||
**Open Questions:**
|
**Open Questions:**
|
||||||
- Should Llama Stack include built-in support for some, all, or none of OpenAI's connectors?
|
- Should Llama Stack include built-in support for some, all, or none of OpenAI's connectors?
|
||||||
- Should there be a mechanism for administrators to add custom connectors via `run.yaml` or an API?
|
- Should there be a mechanism for administrators to add custom connectors via `config.yaml` or an API?
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -210,7 +210,7 @@ Metadata allows you to attach additional information to a response for your own
|
||||||
|
|
||||||
**Status:** Feature Request
|
**Status:** Feature Request
|
||||||
|
|
||||||
When calling the OpenAI Responses API, model outputs go through safety models configured by OpenAI administrators. Perhaps Llama Stack should provide a mechanism to configure safety models (or non-model logic) for all Responses requests, either through `run.yaml` or an administrative API.
|
When calling the OpenAI Responses API, model outputs go through safety models configured by OpenAI administrators. Perhaps Llama Stack should provide a mechanism to configure safety models (or non-model logic) for all Responses requests, either through `config.yaml` or an administrative API.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -355,7 +355,7 @@ The purpose of scoring function is to calculate the score for each example based
|
||||||
Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe.
|
Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe.
|
||||||
|
|
||||||
### Add new benchmark into template
|
### Add new benchmark into template
|
||||||
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/run.yaml)
|
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/config.yaml)
|
||||||
|
|
||||||
Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have
|
Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have
|
||||||
- `benchmark_id`: identifier of the benchmark
|
- `benchmark_id`: identifier of the benchmark
|
||||||
|
|
@ -366,7 +366,7 @@ Secondly, you need to add the new benchmark you just created under the `benchmar
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' templates
|
Spin up llama stack server with 'open-benchmark' templates
|
||||||
```bash
|
```bash
|
||||||
llama stack run llama_stack/distributions/open-benchmark/run.yaml
|
llama stack run llama_stack/distributions/open-benchmark/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
Run eval benchmark CLI with your new benchmark id
|
Run eval benchmark CLI with your new benchmark id
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ dependencies = [
|
||||||
"asyncpg", # for metadata store
|
"asyncpg", # for metadata store
|
||||||
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
|
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
|
||||||
"starlette>=0.49.1",
|
"starlette>=0.49.1",
|
||||||
|
"psycopg2-binary",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|
|
||||||
|
|
@ -11,15 +11,9 @@ from pathlib import Path
|
||||||
import yaml
|
import yaml
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_stack.cli.stack.utils import ImageType
|
|
||||||
from llama_stack.core.build import get_provider_dependencies
|
from llama_stack.core.build import get_provider_dependencies
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import Provider, StackConfig
|
||||||
BuildConfig,
|
|
||||||
BuildProvider,
|
|
||||||
DistributionSpec,
|
|
||||||
)
|
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.core.stack import replace_env_vars
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack_api import Api
|
from llama_stack_api import Api
|
||||||
|
|
||||||
|
|
@ -70,9 +64,9 @@ def format_output_deps_only(
|
||||||
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||||
if args.config:
|
if args.config:
|
||||||
try:
|
try:
|
||||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
from llama_stack.core.utils.config_resolution import resolve_config_or_distro
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(args.config, Mode.BUILD)
|
config_file = resolve_config_or_distro(args.config)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
cprint(
|
cprint(
|
||||||
f"Could not parse config file {args.config}: {e}",
|
f"Could not parse config file {args.config}: {e}",
|
||||||
|
|
@ -84,9 +78,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||||
with open(config_file) as f:
|
with open(config_file) as f:
|
||||||
try:
|
try:
|
||||||
contents = yaml.safe_load(f)
|
contents = yaml.safe_load(f)
|
||||||
contents = replace_env_vars(contents)
|
config = StackConfig(**contents)
|
||||||
build_config = BuildConfig(**contents)
|
|
||||||
build_config.image_type = "venv"
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
cprint(
|
cprint(
|
||||||
f"Could not parse config file {config_file}: {e}",
|
f"Could not parse config file {config_file}: {e}",
|
||||||
|
|
@ -95,7 +87,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
elif args.providers:
|
elif args.providers:
|
||||||
provider_list: dict[str, list[BuildProvider]] = dict()
|
provider_list: dict[str, list[Provider]] = dict()
|
||||||
for api_provider in args.providers.split(","):
|
for api_provider in args.providers.split(","):
|
||||||
if "=" not in api_provider:
|
if "=" not in api_provider:
|
||||||
cprint(
|
cprint(
|
||||||
|
|
@ -114,8 +106,9 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
if provider_type in providers_for_api:
|
if provider_type in providers_for_api:
|
||||||
provider = BuildProvider(
|
provider = Provider(
|
||||||
provider_type=provider_type,
|
provider_type=provider_type,
|
||||||
|
provider_id=provider_type.split("::")[1],
|
||||||
module=None,
|
module=None,
|
||||||
)
|
)
|
||||||
provider_list.setdefault(api, []).append(provider)
|
provider_list.setdefault(api, []).append(provider)
|
||||||
|
|
@ -126,20 +119,16 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
distribution_spec = DistributionSpec(
|
config = StackConfig(providers=provider_list, image_name="providers-run")
|
||||||
providers=provider_list,
|
|
||||||
description=",".join(args.providers),
|
|
||||||
)
|
|
||||||
build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
|
|
||||||
|
|
||||||
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
|
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(config)
|
||||||
normal_deps += SERVER_DEPENDENCIES
|
normal_deps += SERVER_DEPENDENCIES
|
||||||
|
|
||||||
# Add external API dependencies
|
# Add external API dependencies
|
||||||
if build_config.external_apis_dir:
|
if config.external_apis_dir:
|
||||||
from llama_stack.core.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
|
|
||||||
external_apis = load_external_apis(build_config)
|
external_apis = load_external_apis(config)
|
||||||
if external_apis:
|
if external_apis:
|
||||||
for _, api_spec in external_apis.items():
|
for _, api_spec in external_apis.items():
|
||||||
normal_deps.extend(api_spec.pip_packages)
|
normal_deps.extend(api_spec.pip_packages)
|
||||||
|
|
|
||||||
|
|
@ -59,19 +59,17 @@ class StackListBuilds(Subcommand):
|
||||||
print("No distributions found")
|
print("No distributions found")
|
||||||
return
|
return
|
||||||
|
|
||||||
headers = ["Stack Name", "Source", "Path", "Build Config", "Run Config"]
|
headers = ["Stack Name", "Source", "Path", "Config"]
|
||||||
rows = []
|
rows = []
|
||||||
for name, (path, source_type) in sorted(distributions.items()):
|
for name, (path, source_type) in sorted(distributions.items()):
|
||||||
row = [name, source_type, str(path)]
|
row = [name, source_type, str(path)]
|
||||||
# Check for build and run config files
|
# Check for config files
|
||||||
# For built-in distributions, configs are named build.yaml and run.yaml
|
# For built-in distributions, configs are named config.yaml
|
||||||
# For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml
|
# For custom distributions, configs are named {name}-config.yaml
|
||||||
if source_type == "built-in":
|
if source_type == "built-in":
|
||||||
build_config = "Yes" if (path / "build.yaml").exists() else "No"
|
config = "Yes" if (path / "config.yaml").exists() else "No"
|
||||||
run_config = "Yes" if (path / "run.yaml").exists() else "No"
|
|
||||||
else:
|
else:
|
||||||
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
|
config = "Yes" if (path / f"{name}-config.yaml").exists() else "No"
|
||||||
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
|
row.extend([config])
|
||||||
row.extend([build_config, run_config])
|
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
print_table(rows, headers, separate_rows=True)
|
print_table(rows, headers, separate_rows=True)
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from termcolor import cprint
|
||||||
|
|
||||||
from llama_stack.cli.stack.utils import ImageType
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
from llama_stack.core.datatypes import Api, Provider, StackRunConfig
|
from llama_stack.core.datatypes import Api, Provider, StackConfig
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
||||||
from llama_stack.core.storage.datatypes import (
|
from llama_stack.core.storage.datatypes import (
|
||||||
|
|
@ -30,7 +30,7 @@ from llama_stack.core.storage.datatypes import (
|
||||||
StorageConfig,
|
StorageConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
from llama_stack.core.utils.config_resolution import resolve_config_or_distro
|
||||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.log import LoggingConfig, get_logger
|
from llama_stack.log import LoggingConfig, get_logger
|
||||||
|
|
||||||
|
|
@ -108,9 +108,9 @@ class StackRun(Subcommand):
|
||||||
|
|
||||||
if args.config:
|
if args.config:
|
||||||
try:
|
try:
|
||||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
from llama_stack.core.utils.config_resolution import resolve_config_or_distro
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(args.config, Mode.RUN)
|
config_file = resolve_config_or_distro(args.config)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
self.parser.error(str(e))
|
self.parser.error(str(e))
|
||||||
elif args.providers:
|
elif args.providers:
|
||||||
|
|
@ -156,7 +156,7 @@ class StackRun(Subcommand):
|
||||||
|
|
||||||
# Write config to disk in providers-run directory
|
# Write config to disk in providers-run directory
|
||||||
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
|
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
|
||||||
config_file = distro_dir / "run.yaml"
|
config_file = distro_dir / "config.yaml"
|
||||||
|
|
||||||
logger.info(f"Writing generated config to: {config_file}")
|
logger.info(f"Writing generated config to: {config_file}")
|
||||||
with open(config_file, "w") as f:
|
with open(config_file, "w") as f:
|
||||||
|
|
@ -187,14 +187,14 @@ class StackRun(Subcommand):
|
||||||
if not config_file:
|
if not config_file:
|
||||||
self.parser.error("Config file is required")
|
self.parser.error("Config file is required")
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
|
config_file = resolve_config_or_distro(str(config_file))
|
||||||
with open(config_file) as fp:
|
with open(config_file) as fp:
|
||||||
config_contents = yaml.safe_load(fp)
|
config_contents = yaml.safe_load(fp)
|
||||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||||
logger_config = LoggingConfig(**cfg)
|
logger_config = LoggingConfig(**cfg)
|
||||||
else:
|
else:
|
||||||
logger_config = None
|
logger_config = None
|
||||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
config = StackConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||||
|
|
||||||
port = args.port or config.server.port
|
port = args.port or config.server.port
|
||||||
host = config.server.host or ["::", "0.0.0.0"]
|
host = config.server.host or ["::", "0.0.0.0"]
|
||||||
|
|
@ -318,7 +318,7 @@ class StackRun(Subcommand):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
return StackRunConfig(
|
return StackConfig(
|
||||||
image_name="providers-run",
|
image_name="providers-run",
|
||||||
apis=apis,
|
apis=apis,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
|
|
|
||||||
|
|
@ -4,36 +4,9 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import lru_cache
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import yaml
|
|
||||||
from termcolor import cprint
|
|
||||||
|
|
||||||
from llama_stack.core.datatypes import (
|
|
||||||
BuildConfig,
|
|
||||||
Provider,
|
|
||||||
StackRunConfig,
|
|
||||||
StorageConfig,
|
|
||||||
)
|
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
|
||||||
from llama_stack.core.resolver import InvalidProviderError
|
|
||||||
from llama_stack.core.storage.datatypes import (
|
|
||||||
InferenceStoreReference,
|
|
||||||
KVStoreReference,
|
|
||||||
ServerStoresConfig,
|
|
||||||
SqliteKVStoreConfig,
|
|
||||||
SqliteSqlStoreConfig,
|
|
||||||
SqlStoreReference,
|
|
||||||
)
|
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
|
||||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
||||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
|
||||||
from llama_stack_api import Api
|
|
||||||
|
|
||||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -49,103 +22,3 @@ def print_subcommand_description(parser, subparsers):
|
||||||
description = subcommand.description
|
description = subcommand.description
|
||||||
description_text += f" {name:<21} {description}\n"
|
description_text += f" {name:<21} {description}\n"
|
||||||
parser.epilog = description_text
|
parser.epilog = description_text
|
||||||
|
|
||||||
|
|
||||||
def generate_run_config(
|
|
||||||
build_config: BuildConfig,
|
|
||||||
build_dir: Path,
|
|
||||||
image_name: str,
|
|
||||||
) -> Path:
|
|
||||||
"""
|
|
||||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
|
||||||
"""
|
|
||||||
apis = list(build_config.distribution_spec.providers.keys())
|
|
||||||
distro_dir = DISTRIBS_BASE_DIR / image_name
|
|
||||||
run_config = StackRunConfig(
|
|
||||||
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
|
||||||
image_name=image_name,
|
|
||||||
apis=apis,
|
|
||||||
providers={},
|
|
||||||
storage=StorageConfig(
|
|
||||||
backends={
|
|
||||||
"kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
|
|
||||||
"sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
|
|
||||||
},
|
|
||||||
stores=ServerStoresConfig(
|
|
||||||
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
|
||||||
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
|
||||||
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
external_providers_dir=build_config.external_providers_dir
|
|
||||||
if build_config.external_providers_dir
|
|
||||||
else EXTERNAL_PROVIDERS_DIR,
|
|
||||||
)
|
|
||||||
# build providers dict
|
|
||||||
provider_registry = get_provider_registry(build_config)
|
|
||||||
for api in apis:
|
|
||||||
run_config.providers[api] = []
|
|
||||||
providers = build_config.distribution_spec.providers[api]
|
|
||||||
|
|
||||||
for provider in providers:
|
|
||||||
pid = provider.provider_type.split("::")[-1]
|
|
||||||
|
|
||||||
p = provider_registry[Api(api)][provider.provider_type]
|
|
||||||
if p.deprecation_error:
|
|
||||||
raise InvalidProviderError(p.deprecation_error)
|
|
||||||
|
|
||||||
try:
|
|
||||||
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
|
||||||
except (ModuleNotFoundError, ValueError) as exc:
|
|
||||||
# HACK ALERT:
|
|
||||||
# This code executes after building is done, the import cannot work since the
|
|
||||||
# package is either available in the venv or container - not available on the host.
|
|
||||||
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
|
||||||
# external
|
|
||||||
cprint(
|
|
||||||
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
|
||||||
color="yellow",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
# Set config_type to None to avoid UnboundLocalError
|
|
||||||
config_type = None
|
|
||||||
|
|
||||||
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
|
||||||
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
|
||||||
else:
|
|
||||||
config = {}
|
|
||||||
|
|
||||||
p_spec = Provider(
|
|
||||||
provider_id=pid,
|
|
||||||
provider_type=provider.provider_type,
|
|
||||||
config=config,
|
|
||||||
module=provider.module,
|
|
||||||
)
|
|
||||||
run_config.providers[api].append(p_spec)
|
|
||||||
|
|
||||||
run_config_file = build_dir / f"{image_name}-run.yaml"
|
|
||||||
|
|
||||||
with open(run_config_file, "w") as f:
|
|
||||||
to_write = json.loads(run_config.model_dump_json())
|
|
||||||
f.write(yaml.dump(to_write, sort_keys=False))
|
|
||||||
|
|
||||||
# Only print this message for non-container builds since it will be displayed before the
|
|
||||||
# container is built
|
|
||||||
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
|
||||||
# makes sense to display this message
|
|
||||||
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
|
||||||
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
|
||||||
return run_config_file
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
|
||||||
def available_templates_specs() -> dict[str, BuildConfig]:
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
template_specs = {}
|
|
||||||
for p in TEMPLATES_PATH.rglob("*build.yaml"):
|
|
||||||
template_name = p.parent.name
|
|
||||||
with open(p) as f:
|
|
||||||
build_config = BuildConfig(**yaml.safe_load(f))
|
|
||||||
template_specs[template_name] = build_config
|
|
||||||
return template_specs
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import sys
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_stack.core.datatypes import BuildConfig
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.distributions.template import DistributionTemplate
|
from llama_stack.distributions.template import DistributionTemplate
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
@ -36,18 +36,17 @@ class ApiInput(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
def get_provider_dependencies(
|
def get_provider_dependencies(
|
||||||
config: BuildConfig | DistributionTemplate,
|
config: StackConfig,
|
||||||
) -> tuple[list[str], list[str], list[str]]:
|
) -> tuple[list[str], list[str], list[str]]:
|
||||||
"""Get normal and special dependencies from provider configuration."""
|
"""Get normal and special dependencies from provider configuration."""
|
||||||
if isinstance(config, DistributionTemplate):
|
if isinstance(config, DistributionTemplate):
|
||||||
config = config.build_config()
|
config = config.build_config()
|
||||||
|
|
||||||
providers = config.distribution_spec.providers
|
providers = config.providers
|
||||||
additional_pip_packages = config.additional_pip_packages
|
|
||||||
|
|
||||||
deps = []
|
deps = []
|
||||||
external_provider_deps = []
|
external_provider_deps = []
|
||||||
registry = get_provider_registry(config)
|
registry = get_provider_registry(config=config, listing=True)
|
||||||
for api_str, provider_or_providers in providers.items():
|
for api_str, provider_or_providers in providers.items():
|
||||||
providers_for_api = registry[Api(api_str)]
|
providers_for_api = registry[Api(api_str)]
|
||||||
|
|
||||||
|
|
@ -81,12 +80,10 @@ def get_provider_dependencies(
|
||||||
else:
|
else:
|
||||||
normal_deps.append(package)
|
normal_deps.append(package)
|
||||||
|
|
||||||
normal_deps.extend(additional_pip_packages or [])
|
|
||||||
|
|
||||||
return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps))
|
return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps))
|
||||||
|
|
||||||
|
|
||||||
def print_pip_install_help(config: BuildConfig):
|
def print_pip_install_help(config: StackConfig):
|
||||||
normal_deps, special_deps, _ = get_provider_dependencies(config)
|
normal_deps, special_deps, _ = get_provider_dependencies(config)
|
||||||
|
|
||||||
cprint(
|
cprint(
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from llama_stack.core.datatypes import (
|
||||||
LLAMA_STACK_RUN_CONFIG_VERSION,
|
LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||||
DistributionSpec,
|
DistributionSpec,
|
||||||
Provider,
|
Provider,
|
||||||
StackRunConfig,
|
StackConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.core.distribution import (
|
from llama_stack.core.distribution import (
|
||||||
builtin_automatically_routed_apis,
|
builtin_automatically_routed_apis,
|
||||||
|
|
@ -44,7 +44,7 @@ def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provi
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec) -> StackRunConfig:
|
def configure_api_providers(config: StackConfig, build_spec: DistributionSpec) -> StackConfig:
|
||||||
is_nux = len(config.providers) == 0
|
is_nux = len(config.providers) == 0
|
||||||
|
|
||||||
if is_nux:
|
if is_nux:
|
||||||
|
|
@ -192,7 +192,7 @@ def upgrade_from_routing_table(
|
||||||
return config_dict
|
return config_dict
|
||||||
|
|
||||||
|
|
||||||
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
|
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackConfig:
|
||||||
if "routing_table" in config_dict:
|
if "routing_table" in config_dict:
|
||||||
logger.info("Upgrading config...")
|
logger.info("Upgrading config...")
|
||||||
config_dict = upgrade_from_routing_table(config_dict)
|
config_dict = upgrade_from_routing_table(config_dict)
|
||||||
|
|
@ -200,4 +200,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
|
||||||
config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
|
config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||||
|
|
||||||
processed_config_dict = replace_env_vars(config_dict)
|
processed_config_dict = replace_env_vars(config_dict)
|
||||||
return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
|
return StackConfig(**cast_image_name_to_string(processed_config_dict))
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from typing import Any, Literal
|
||||||
|
|
||||||
from pydantic import BaseModel, TypeAdapter
|
from pydantic import BaseModel, TypeAdapter
|
||||||
|
|
||||||
from llama_stack.core.datatypes import AccessRule, StackRunConfig
|
from llama_stack.core.datatypes import AccessRule, StackConfig
|
||||||
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||||
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
|
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
@ -36,7 +36,7 @@ class ConversationServiceConfig(BaseModel):
|
||||||
:param policy: Access control rules
|
:param policy: Access control rules
|
||||||
"""
|
"""
|
||||||
|
|
||||||
run_config: StackRunConfig
|
config: StackConfig
|
||||||
policy: list[AccessRule] = []
|
policy: list[AccessRule] = []
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -56,7 +56,7 @@ class ConversationServiceImpl(Conversations):
|
||||||
self.policy = config.policy
|
self.policy = config.policy
|
||||||
|
|
||||||
# Use conversations store reference from run config
|
# Use conversations store reference from run config
|
||||||
conversations_ref = config.run_config.storage.stores.conversations
|
conversations_ref = config.config.storage.stores.conversations
|
||||||
if not conversations_ref:
|
if not conversations_ref:
|
||||||
raise ValueError("storage.stores.conversations must be configured in run config")
|
raise ValueError("storage.stores.conversations must be configured in run config")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -474,7 +474,7 @@ class ServerConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class StackRunConfig(BaseModel):
|
class StackConfig(BaseModel):
|
||||||
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||||
|
|
||||||
image_name: str = Field(
|
image_name: str = Field(
|
||||||
|
|
@ -501,6 +501,7 @@ can be instantiated multiple times (with different configs) if necessary.
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
storage: StorageConfig = Field(
|
storage: StorageConfig = Field(
|
||||||
|
default_factory=StorageConfig,
|
||||||
description="Catalog of named storage backends and references available to the stack",
|
description="Catalog of named storage backends and references available to the stack",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -546,7 +547,7 @@ can be instantiated multiple times (with different configs) if necessary.
|
||||||
return v
|
return v
|
||||||
|
|
||||||
@model_validator(mode="after")
|
@model_validator(mode="after")
|
||||||
def validate_server_stores(self) -> "StackRunConfig":
|
def validate_server_stores(self) -> "StackConfig":
|
||||||
backend_map = self.storage.backends
|
backend_map = self.storage.backends
|
||||||
stores = self.storage.stores
|
stores = self.storage.stores
|
||||||
kv_backends = {
|
kv_backends = {
|
||||||
|
|
@ -588,39 +589,3 @@ can be instantiated multiple times (with different configs) if necessary.
|
||||||
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
|
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
|
||||||
_ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts")
|
_ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
class BuildConfig(BaseModel):
|
|
||||||
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
|
|
||||||
|
|
||||||
distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
|
|
||||||
image_type: str = Field(
|
|
||||||
default="venv",
|
|
||||||
description="Type of package to build (container | venv)",
|
|
||||||
)
|
|
||||||
image_name: str | None = Field(
|
|
||||||
default=None,
|
|
||||||
description="Name of the distribution to build",
|
|
||||||
)
|
|
||||||
external_providers_dir: Path | None = Field(
|
|
||||||
default=None,
|
|
||||||
description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
|
|
||||||
"pip_packages MUST contain the provider package name.",
|
|
||||||
)
|
|
||||||
additional_pip_packages: list[str] = Field(
|
|
||||||
default_factory=list,
|
|
||||||
description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.",
|
|
||||||
)
|
|
||||||
external_apis_dir: Path | None = Field(
|
|
||||||
default=None,
|
|
||||||
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
|
|
||||||
)
|
|
||||||
|
|
||||||
@field_validator("external_providers_dir")
|
|
||||||
@classmethod
|
|
||||||
def validate_external_providers_dir(cls, v):
|
|
||||||
if v is None:
|
|
||||||
return None
|
|
||||||
if isinstance(v, str):
|
|
||||||
return Path(v)
|
|
||||||
return v
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ from typing import Any
|
||||||
import yaml
|
import yaml
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.core.datatypes import BuildConfig, DistributionSpec
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.core.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
|
|
@ -85,7 +85,9 @@ def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_nam
|
||||||
return spec
|
return spec
|
||||||
|
|
||||||
|
|
||||||
def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
|
def get_provider_registry(
|
||||||
|
config: StackConfig | None = None, listing: bool = False
|
||||||
|
) -> dict[Api, dict[str, ProviderSpec]]:
|
||||||
"""Get the provider registry, optionally including external providers.
|
"""Get the provider registry, optionally including external providers.
|
||||||
|
|
||||||
This function loads both built-in providers and external providers from YAML files or from their provided modules.
|
This function loads both built-in providers and external providers from YAML files or from their provided modules.
|
||||||
|
|
@ -109,13 +111,13 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
|
||||||
safety/
|
safety/
|
||||||
llama-guard.yaml
|
llama-guard.yaml
|
||||||
|
|
||||||
This method is overloaded in that it can be called from a variety of places: during build, during run, during stack construction.
|
This method is overloaded in that it can be called from a variety of places: during list-deps, during run, during stack construction.
|
||||||
So when building external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
|
So when listing external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
|
||||||
There is special handling for all of the potential cases this method can be called from.
|
There is special handling for all of the potential cases this method can be called from.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config: Optional object containing the external providers directory path
|
config: Optional object containing the external providers directory path
|
||||||
building: Optional bool delineating whether or not this is being called from a build process
|
listing: Optional bool delineating whether or not this is being called from a list-deps process
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A dictionary mapping APIs to their available providers
|
A dictionary mapping APIs to their available providers
|
||||||
|
|
@ -161,7 +163,7 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
|
||||||
registry = get_external_providers_from_module(
|
registry = get_external_providers_from_module(
|
||||||
registry=registry,
|
registry=registry,
|
||||||
config=config,
|
config=config,
|
||||||
building=(isinstance(config, BuildConfig) or isinstance(config, DistributionSpec)),
|
listing=listing,
|
||||||
)
|
)
|
||||||
|
|
||||||
return registry
|
return registry
|
||||||
|
|
@ -220,13 +222,10 @@ def get_external_providers_from_dir(
|
||||||
|
|
||||||
|
|
||||||
def get_external_providers_from_module(
|
def get_external_providers_from_module(
|
||||||
registry: dict[Api, dict[str, ProviderSpec]], config, building: bool
|
registry: dict[Api, dict[str, ProviderSpec]], config, listing: bool
|
||||||
) -> dict[Api, dict[str, ProviderSpec]]:
|
) -> dict[Api, dict[str, ProviderSpec]]:
|
||||||
provider_list = None
|
provider_list = None
|
||||||
if isinstance(config, BuildConfig):
|
provider_list = config.providers.items()
|
||||||
provider_list = config.distribution_spec.providers.items()
|
|
||||||
else:
|
|
||||||
provider_list = config.providers.items()
|
|
||||||
if provider_list is None:
|
if provider_list is None:
|
||||||
logger.warning("Could not get list of providers from config")
|
logger.warning("Could not get list of providers from config")
|
||||||
return registry
|
return registry
|
||||||
|
|
@ -236,14 +235,14 @@ def get_external_providers_from_module(
|
||||||
continue
|
continue
|
||||||
# get provider using module
|
# get provider using module
|
||||||
try:
|
try:
|
||||||
if not building:
|
if not listing:
|
||||||
package_name = provider.module.split("==")[0]
|
package_name = provider.module.split("==")[0]
|
||||||
module = importlib.import_module(f"{package_name}.provider")
|
module = importlib.import_module(f"{package_name}.provider")
|
||||||
# if config class is wrong you will get an error saying module could not be imported
|
# if config class is wrong you will get an error saying module could not be imported
|
||||||
spec = module.get_provider_spec()
|
spec = module.get_provider_spec()
|
||||||
else:
|
else:
|
||||||
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon list-deps and run
|
||||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
# in the case we are listing we CANNOT import this module of course because it has not been installed.
|
||||||
spec = ProviderSpec(
|
spec = ProviderSpec(
|
||||||
api=Api(provider_api),
|
api=Api(provider_api),
|
||||||
provider_type=provider.provider_type,
|
provider_type=provider.provider_type,
|
||||||
|
|
|
||||||
|
|
@ -7,14 +7,14 @@
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.core.datatypes import BuildConfig, StackRunConfig
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack_api import Api, ExternalApiSpec
|
from llama_stack_api import Api, ExternalApiSpec
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="core")
|
logger = get_logger(name=__name__, category="core")
|
||||||
|
|
||||||
|
|
||||||
def load_external_apis(config: StackRunConfig | BuildConfig | None) -> dict[Api, ExternalApiSpec]:
|
def load_external_apis(config: StackConfig | None) -> dict[Api, ExternalApiSpec]:
|
||||||
"""Load external API specifications from the configured directory.
|
"""Load external API specifications from the configured directory.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from importlib.metadata import version
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.core.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.core.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
from llama_stack.core.server.routes import get_all_api_routes
|
from llama_stack.core.server.routes import get_all_api_routes
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
|
|
@ -22,7 +22,7 @@ from llama_stack_api import (
|
||||||
|
|
||||||
|
|
||||||
class DistributionInspectConfig(BaseModel):
|
class DistributionInspectConfig(BaseModel):
|
||||||
run_config: StackRunConfig
|
config: StackConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config, deps):
|
async def get_provider_impl(config, deps):
|
||||||
|
|
@ -33,14 +33,14 @@ async def get_provider_impl(config, deps):
|
||||||
|
|
||||||
class DistributionInspectImpl(Inspect):
|
class DistributionInspectImpl(Inspect):
|
||||||
def __init__(self, config: DistributionInspectConfig, deps):
|
def __init__(self, config: DistributionInspectConfig, deps):
|
||||||
self.config = config
|
self.stack_config = config.config
|
||||||
self.deps = deps
|
self.deps = deps
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse:
|
async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse:
|
||||||
run_config: StackRunConfig = self.config.run_config
|
config: StackConfig = self.stack_config
|
||||||
|
|
||||||
# Helper function to determine if a route should be included based on api_filter
|
# Helper function to determine if a route should be included based on api_filter
|
||||||
def should_include_route(webmethod) -> bool:
|
def should_include_route(webmethod) -> bool:
|
||||||
|
|
@ -55,7 +55,7 @@ class DistributionInspectImpl(Inspect):
|
||||||
return not webmethod.deprecated and webmethod.level == api_filter
|
return not webmethod.deprecated and webmethod.level == api_filter
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
external_apis = load_external_apis(run_config)
|
external_apis = load_external_apis(config)
|
||||||
all_endpoints = get_all_api_routes(external_apis)
|
all_endpoints = get_all_api_routes(external_apis)
|
||||||
for api, endpoints in all_endpoints.items():
|
for api, endpoints in all_endpoints.items():
|
||||||
# Always include provider and inspect APIs, filter others based on run config
|
# Always include provider and inspect APIs, filter others based on run config
|
||||||
|
|
@ -72,7 +72,7 @@ class DistributionInspectImpl(Inspect):
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
providers = run_config.providers.get(api.value, [])
|
providers = config.providers.get(api.value, [])
|
||||||
if providers: # Only process if there are providers for this API
|
if providers: # Only process if there are providers for this API
|
||||||
ret.extend(
|
ret.extend(
|
||||||
[
|
[
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,6 @@ from termcolor import cprint
|
||||||
|
|
||||||
from llama_stack.core.build import print_pip_install_help
|
from llama_stack.core.build import print_pip_install_help
|
||||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
|
|
||||||
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context
|
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context
|
||||||
from llama_stack.core.resolver import ProviderRegistry
|
from llama_stack.core.resolver import ProviderRegistry
|
||||||
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
||||||
|
|
@ -257,20 +256,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
if self.config_path_or_distro_name.endswith(".yaml"):
|
if self.config_path_or_distro_name.endswith(".yaml"):
|
||||||
providers: dict[str, list[BuildProvider]] = {}
|
print_pip_install_help(self.config)
|
||||||
for api, run_providers in self.config.providers.items():
|
|
||||||
for provider in run_providers:
|
|
||||||
providers.setdefault(api, []).append(
|
|
||||||
BuildProvider(provider_type=provider.provider_type, module=provider.module)
|
|
||||||
)
|
|
||||||
providers = dict(providers)
|
|
||||||
build_config = BuildConfig(
|
|
||||||
distribution_spec=DistributionSpec(
|
|
||||||
providers=providers,
|
|
||||||
),
|
|
||||||
external_providers_dir=self.config.external_providers_dir,
|
|
||||||
)
|
|
||||||
print_pip_install_help(build_config)
|
|
||||||
else:
|
else:
|
||||||
prefix = "!" if in_notebook() else ""
|
prefix = "!" if in_notebook() else ""
|
||||||
cprint(
|
cprint(
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from typing import Any
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.core.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
|
from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
|
||||||
from llama_stack_api import ListPromptsResponse, Prompt, Prompts
|
from llama_stack_api import ListPromptsResponse, Prompt, Prompts
|
||||||
|
|
||||||
|
|
@ -20,7 +20,7 @@ class PromptServiceConfig(BaseModel):
|
||||||
:param run_config: Stack run configuration containing distribution info
|
:param run_config: Stack run configuration containing distribution info
|
||||||
"""
|
"""
|
||||||
|
|
||||||
run_config: StackRunConfig
|
config: StackConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
|
async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
|
||||||
|
|
@ -34,13 +34,13 @@ class PromptServiceImpl(Prompts):
|
||||||
"""Built-in prompt service implementation using KVStore."""
|
"""Built-in prompt service implementation using KVStore."""
|
||||||
|
|
||||||
def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
|
def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
|
||||||
self.config = config
|
self.stack_config = config.config
|
||||||
self.deps = deps
|
self.deps = deps
|
||||||
self.kvstore: KVStore
|
self.kvstore: KVStore
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
# Use prompts store reference from run config
|
# Use prompts store reference from run config
|
||||||
prompts_ref = self.config.run_config.storage.stores.prompts
|
prompts_ref = self.stack_config.storage.stores.prompts
|
||||||
if not prompts_ref:
|
if not prompts_ref:
|
||||||
raise ValueError("storage.stores.prompts must be configured in run config")
|
raise ValueError("storage.stores.prompts must be configured in run config")
|
||||||
self.kvstore = await kvstore_impl(prompts_ref)
|
self.kvstore = await kvstore_impl(prompts_ref)
|
||||||
|
|
|
||||||
|
|
@ -12,14 +12,14 @@ from pydantic import BaseModel
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
|
from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
|
||||||
|
|
||||||
from .datatypes import StackRunConfig
|
from .datatypes import StackConfig
|
||||||
from .utils.config import redact_sensitive_fields
|
from .utils.config import redact_sensitive_fields
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="core")
|
logger = get_logger(name=__name__, category="core")
|
||||||
|
|
||||||
|
|
||||||
class ProviderImplConfig(BaseModel):
|
class ProviderImplConfig(BaseModel):
|
||||||
run_config: StackRunConfig
|
config: StackConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config, deps):
|
async def get_provider_impl(config, deps):
|
||||||
|
|
@ -30,7 +30,7 @@ async def get_provider_impl(config, deps):
|
||||||
|
|
||||||
class ProviderImpl(Providers):
|
class ProviderImpl(Providers):
|
||||||
def __init__(self, config, deps):
|
def __init__(self, config, deps):
|
||||||
self.config = config
|
self.stack_config = config.config
|
||||||
self.deps = deps
|
self.deps = deps
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
|
@ -41,8 +41,8 @@ class ProviderImpl(Providers):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def list_providers(self) -> ListProvidersResponse:
|
async def list_providers(self) -> ListProvidersResponse:
|
||||||
run_config = self.config.run_config
|
run_config = self.stack_config
|
||||||
safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
|
safe_config = StackConfig(**redact_sensitive_fields(run_config.model_dump()))
|
||||||
providers_health = await self.get_providers_health()
|
providers_health = await self.get_providers_health()
|
||||||
ret = []
|
ret = []
|
||||||
for api, providers in safe_config.providers.items():
|
for api, providers in safe_config.providers.items():
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ from llama_stack.core.datatypes import (
|
||||||
AutoRoutedProviderSpec,
|
AutoRoutedProviderSpec,
|
||||||
Provider,
|
Provider,
|
||||||
RoutingTableProviderSpec,
|
RoutingTableProviderSpec,
|
||||||
StackRunConfig,
|
StackConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
||||||
from llama_stack.core.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
|
|
@ -147,7 +147,7 @@ ProviderRegistry = dict[Api, dict[str, ProviderSpec]]
|
||||||
|
|
||||||
|
|
||||||
async def resolve_impls(
|
async def resolve_impls(
|
||||||
run_config: StackRunConfig,
|
run_config: StackConfig,
|
||||||
provider_registry: ProviderRegistry,
|
provider_registry: ProviderRegistry,
|
||||||
dist_registry: DistributionRegistry,
|
dist_registry: DistributionRegistry,
|
||||||
policy: list[AccessRule],
|
policy: list[AccessRule],
|
||||||
|
|
@ -217,7 +217,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
|
||||||
|
|
||||||
|
|
||||||
def validate_and_prepare_providers(
|
def validate_and_prepare_providers(
|
||||||
run_config: StackRunConfig, provider_registry: ProviderRegistry, routing_table_apis: set[Api], router_apis: set[Api]
|
run_config: StackConfig, provider_registry: ProviderRegistry, routing_table_apis: set[Api], router_apis: set[Api]
|
||||||
) -> dict[str, dict[str, ProviderWithSpec]]:
|
) -> dict[str, dict[str, ProviderWithSpec]]:
|
||||||
"""Validates providers, handles deprecations, and organizes them into a spec dictionary."""
|
"""Validates providers, handles deprecations, and organizes them into a spec dictionary."""
|
||||||
providers_with_specs: dict[str, dict[str, ProviderWithSpec]] = {}
|
providers_with_specs: dict[str, dict[str, ProviderWithSpec]] = {}
|
||||||
|
|
@ -261,7 +261,7 @@ def validate_provider(provider: Provider, api: Api, provider_registry: ProviderR
|
||||||
|
|
||||||
|
|
||||||
def sort_providers_by_deps(
|
def sort_providers_by_deps(
|
||||||
providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config: StackRunConfig
|
providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config: StackConfig
|
||||||
) -> list[tuple[str, ProviderWithSpec]]:
|
) -> list[tuple[str, ProviderWithSpec]]:
|
||||||
"""Sorts providers based on their dependencies."""
|
"""Sorts providers based on their dependencies."""
|
||||||
sorted_providers: list[tuple[str, ProviderWithSpec]] = topological_sort(
|
sorted_providers: list[tuple[str, ProviderWithSpec]] = topological_sort(
|
||||||
|
|
@ -278,7 +278,7 @@ async def instantiate_providers(
|
||||||
sorted_providers: list[tuple[str, ProviderWithSpec]],
|
sorted_providers: list[tuple[str, ProviderWithSpec]],
|
||||||
router_apis: set[Api],
|
router_apis: set[Api],
|
||||||
dist_registry: DistributionRegistry,
|
dist_registry: DistributionRegistry,
|
||||||
run_config: StackRunConfig,
|
run_config: StackConfig,
|
||||||
policy: list[AccessRule],
|
policy: list[AccessRule],
|
||||||
internal_impls: dict[Api, Any] | None = None,
|
internal_impls: dict[Api, Any] | None = None,
|
||||||
) -> dict[Api, Any]:
|
) -> dict[Api, Any]:
|
||||||
|
|
@ -357,7 +357,7 @@ async def instantiate_provider(
|
||||||
deps: dict[Api, Any],
|
deps: dict[Api, Any],
|
||||||
inner_impls: dict[str, Any],
|
inner_impls: dict[str, Any],
|
||||||
dist_registry: DistributionRegistry,
|
dist_registry: DistributionRegistry,
|
||||||
run_config: StackRunConfig,
|
run_config: StackConfig,
|
||||||
policy: list[AccessRule],
|
policy: list[AccessRule],
|
||||||
):
|
):
|
||||||
provider_spec = provider.spec
|
provider_spec = provider.spec
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,8 @@ from typing import Any
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AccessRule,
|
AccessRule,
|
||||||
RoutedProtocol,
|
RoutedProtocol,
|
||||||
|
StackConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.core.stack import StackRunConfig
|
|
||||||
from llama_stack.core.store import DistributionRegistry
|
from llama_stack.core.store import DistributionRegistry
|
||||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||||
from llama_stack_api import Api, RoutingTable
|
from llama_stack_api import Api, RoutingTable
|
||||||
|
|
@ -51,7 +51,7 @@ async def get_routing_table_impl(
|
||||||
|
|
||||||
|
|
||||||
async def get_auto_router_impl(
|
async def get_auto_router_impl(
|
||||||
api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackRunConfig, policy: list[AccessRule]
|
api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackConfig, policy: list[AccessRule]
|
||||||
) -> Any:
|
) -> Any:
|
||||||
from .datasets import DatasetIORouter
|
from .datasets import DatasetIORouter
|
||||||
from .eval_scoring import EvalRouter, ScoringRouter
|
from .eval_scoring import EvalRouter, ScoringRouter
|
||||||
|
|
|
||||||
|
|
@ -224,7 +224,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
existing_models = await self.get_all_with_type("model")
|
existing_models = await self.get_all_with_type("model")
|
||||||
|
|
||||||
# we may have an alias for the model registered by the user (or during initialization
|
# we may have an alias for the model registered by the user (or during initialization
|
||||||
# from run.yaml) that we need to keep track of
|
# from config.yaml) that we need to keep track of
|
||||||
model_ids = {}
|
model_ids = {}
|
||||||
for model in existing_models:
|
for model in existing_models:
|
||||||
if model.provider_id != provider_id:
|
if model.provider_id != provider_id:
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ from pydantic import BaseModel, ValidationError
|
||||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AuthenticationRequiredError,
|
AuthenticationRequiredError,
|
||||||
StackRunConfig,
|
StackConfig,
|
||||||
process_cors_config,
|
process_cors_config,
|
||||||
)
|
)
|
||||||
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
||||||
|
|
@ -51,7 +51,7 @@ from llama_stack.core.stack import (
|
||||||
replace_env_vars,
|
replace_env_vars,
|
||||||
)
|
)
|
||||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
from llama_stack.core.utils.config_resolution import resolve_config_or_distro
|
||||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||||
from llama_stack.log import LoggingConfig, get_logger
|
from llama_stack.log import LoggingConfig, get_logger
|
||||||
from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
|
from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
|
||||||
|
|
@ -146,7 +146,7 @@ class StackApp(FastAPI):
|
||||||
start background tasks (e.g. refresh model registry periodically) from the lifespan context manager.
|
start background tasks (e.g. refresh model registry periodically) from the lifespan context manager.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config: StackRunConfig, *args, **kwargs):
|
def __init__(self, config: StackConfig, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.stack: Stack = Stack(config)
|
self.stack: Stack = Stack(config)
|
||||||
|
|
||||||
|
|
@ -368,7 +368,7 @@ def create_app() -> StackApp:
|
||||||
if config_file is None:
|
if config_file is None:
|
||||||
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
config_file = resolve_config_or_distro(config_file)
|
||||||
|
|
||||||
# Load and process configuration
|
# Load and process configuration
|
||||||
logger_config = None
|
logger_config = None
|
||||||
|
|
@ -379,7 +379,7 @@ def create_app() -> StackApp:
|
||||||
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
||||||
|
|
||||||
config = replace_env_vars(config_contents)
|
config = replace_env_vars(config_contents)
|
||||||
config = StackRunConfig(**cast_image_name_to_string(config))
|
config = StackConfig(**cast_image_name_to_string(config))
|
||||||
|
|
||||||
_log_run_config(run_config=config)
|
_log_run_config(run_config=config)
|
||||||
|
|
||||||
|
|
@ -494,7 +494,7 @@ def create_app() -> StackApp:
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
def _log_run_config(run_config: StackRunConfig):
|
def _log_run_config(run_config: StackConfig):
|
||||||
"""Logs the run config with redacted fields and disabled providers removed."""
|
"""Logs the run config with redacted fields and disabled providers removed."""
|
||||||
logger.info("Run configuration:")
|
logger.info("Run configuration:")
|
||||||
safe_config = redact_sensitive_fields(run_config.model_dump(mode="json"))
|
safe_config = redact_sensitive_fields(run_config.model_dump(mode="json"))
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ from typing import Any
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
||||||
from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
|
from llama_stack.core.datatypes import Provider, SafetyConfig, StackConfig, VectorStoresConfig
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
||||||
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
|
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
|
||||||
|
|
@ -108,7 +108,7 @@ REGISTRY_REFRESH_TASK = None
|
||||||
TEST_RECORDING_CONTEXT = None
|
TEST_RECORDING_CONTEXT = None
|
||||||
|
|
||||||
|
|
||||||
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
|
||||||
for rsrc, api, register_method, list_method in RESOURCES:
|
for rsrc, api, register_method, list_method in RESOURCES:
|
||||||
objects = getattr(run_config.registered_resources, rsrc)
|
objects = getattr(run_config.registered_resources, rsrc)
|
||||||
if api not in impls:
|
if api not in impls:
|
||||||
|
|
@ -341,7 +341,7 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
|
||||||
return config_dict
|
return config_dict
|
||||||
|
|
||||||
|
|
||||||
def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
|
def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) -> None:
|
||||||
"""Add internal implementations (inspect and providers) to the implementations dictionary.
|
"""Add internal implementations (inspect and providers) to the implementations dictionary.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -349,31 +349,31 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
|
||||||
run_config: Stack run configuration
|
run_config: Stack run configuration
|
||||||
"""
|
"""
|
||||||
inspect_impl = DistributionInspectImpl(
|
inspect_impl = DistributionInspectImpl(
|
||||||
DistributionInspectConfig(run_config=run_config),
|
DistributionInspectConfig(config=config),
|
||||||
deps=impls,
|
deps=impls,
|
||||||
)
|
)
|
||||||
impls[Api.inspect] = inspect_impl
|
impls[Api.inspect] = inspect_impl
|
||||||
|
|
||||||
providers_impl = ProviderImpl(
|
providers_impl = ProviderImpl(
|
||||||
ProviderImplConfig(run_config=run_config),
|
ProviderImplConfig(config=config),
|
||||||
deps=impls,
|
deps=impls,
|
||||||
)
|
)
|
||||||
impls[Api.providers] = providers_impl
|
impls[Api.providers] = providers_impl
|
||||||
|
|
||||||
prompts_impl = PromptServiceImpl(
|
prompts_impl = PromptServiceImpl(
|
||||||
PromptServiceConfig(run_config=run_config),
|
PromptServiceConfig(config=config),
|
||||||
deps=impls,
|
deps=impls,
|
||||||
)
|
)
|
||||||
impls[Api.prompts] = prompts_impl
|
impls[Api.prompts] = prompts_impl
|
||||||
|
|
||||||
conversations_impl = ConversationServiceImpl(
|
conversations_impl = ConversationServiceImpl(
|
||||||
ConversationServiceConfig(run_config=run_config),
|
ConversationServiceConfig(config=config),
|
||||||
deps=impls,
|
deps=impls,
|
||||||
)
|
)
|
||||||
impls[Api.conversations] = conversations_impl
|
impls[Api.conversations] = conversations_impl
|
||||||
|
|
||||||
|
|
||||||
def _initialize_storage(run_config: StackRunConfig):
|
def _initialize_storage(run_config: StackConfig):
|
||||||
kv_backends: dict[str, StorageBackendConfig] = {}
|
kv_backends: dict[str, StorageBackendConfig] = {}
|
||||||
sql_backends: dict[str, StorageBackendConfig] = {}
|
sql_backends: dict[str, StorageBackendConfig] = {}
|
||||||
for backend_name, backend_config in run_config.storage.backends.items():
|
for backend_name, backend_config in run_config.storage.backends.items():
|
||||||
|
|
@ -393,7 +393,7 @@ def _initialize_storage(run_config: StackRunConfig):
|
||||||
|
|
||||||
|
|
||||||
class Stack:
|
class Stack:
|
||||||
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
|
def __init__(self, run_config: StackConfig, provider_registry: ProviderRegistry | None = None):
|
||||||
self.run_config = run_config
|
self.run_config = run_config
|
||||||
self.provider_registry = provider_registry
|
self.provider_registry = provider_registry
|
||||||
self.impls = None
|
self.impls = None
|
||||||
|
|
@ -499,20 +499,20 @@ async def refresh_registry_task(impls: dict[Api, Any]):
|
||||||
await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS)
|
await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS)
|
||||||
|
|
||||||
|
|
||||||
def get_stack_run_config_from_distro(distro: str) -> StackRunConfig:
|
def get_stack_run_config_from_distro(distro: str) -> StackConfig:
|
||||||
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/run.yaml"
|
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/config.yaml"
|
||||||
|
|
||||||
with importlib.resources.as_file(distro_path) as path:
|
with importlib.resources.as_file(distro_path) as path:
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
raise ValueError(f"Distribution '{distro}' not found at {distro_path}")
|
raise ValueError(f"Distribution '{distro}' not found at {distro_path}")
|
||||||
run_config = yaml.safe_load(path.open())
|
run_config = yaml.safe_load(path.open())
|
||||||
|
|
||||||
return StackRunConfig(**replace_env_vars(run_config))
|
return StackConfig(**replace_env_vars(run_config))
|
||||||
|
|
||||||
|
|
||||||
def run_config_from_adhoc_config_spec(
|
def run_config_from_adhoc_config_spec(
|
||||||
adhoc_config_spec: str, provider_registry: ProviderRegistry | None = None
|
adhoc_config_spec: str, provider_registry: ProviderRegistry | None = None
|
||||||
) -> StackRunConfig:
|
) -> StackConfig:
|
||||||
"""
|
"""
|
||||||
Create an adhoc distribution from a list of API providers.
|
Create an adhoc distribution from a list of API providers.
|
||||||
|
|
||||||
|
|
@ -552,7 +552,7 @@ def run_config_from_adhoc_config_spec(
|
||||||
config=provider_config,
|
config=provider_config,
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
config = StackRunConfig(
|
config = StackConfig(
|
||||||
image_name="distro-test",
|
image_name="distro-test",
|
||||||
apis=list(provider_configs_by_api.keys()),
|
apis=list(provider_configs_by_api.keys()),
|
||||||
providers=provider_configs_by_api,
|
providers=provider_configs_by_api,
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from enum import StrEnum
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
|
|
@ -16,21 +15,14 @@ logger = get_logger(name=__name__, category="core")
|
||||||
DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions"
|
DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions"
|
||||||
|
|
||||||
|
|
||||||
class Mode(StrEnum):
|
|
||||||
RUN = "run"
|
|
||||||
BUILD = "build"
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_config_or_distro(
|
def resolve_config_or_distro(
|
||||||
config_or_distro: str,
|
config_or_distro: str,
|
||||||
mode: Mode = Mode.RUN,
|
|
||||||
) -> Path:
|
) -> Path:
|
||||||
"""
|
"""
|
||||||
Resolve a config/distro argument to a concrete config file path.
|
Resolve a config/distro argument to a concrete config file path.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config_or_distro: User input (file path, distribution name, or built distribution)
|
config_or_distro: User input (file path, distribution name, or built distribution)
|
||||||
mode: Mode resolving for ("run", "build", "server")
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Path to the resolved config file
|
Path to the resolved config file
|
||||||
|
|
@ -47,7 +39,7 @@ def resolve_config_or_distro(
|
||||||
|
|
||||||
# Strategy 2: Try as distribution name (if no .yaml extension)
|
# Strategy 2: Try as distribution name (if no .yaml extension)
|
||||||
if not config_or_distro.endswith(".yaml"):
|
if not config_or_distro.endswith(".yaml"):
|
||||||
distro_config = _get_distro_config_path(config_or_distro, mode)
|
distro_config = _get_distro_config_path(config_or_distro)
|
||||||
if distro_config.exists():
|
if distro_config.exists():
|
||||||
logger.debug(f"Using distribution: {distro_config}")
|
logger.debug(f"Using distribution: {distro_config}")
|
||||||
return distro_config
|
return distro_config
|
||||||
|
|
@ -63,34 +55,34 @@ def resolve_config_or_distro(
|
||||||
return distro_config
|
return distro_config
|
||||||
|
|
||||||
# Strategy 4: Try as built distribution name
|
# Strategy 4: Try as built distribution name
|
||||||
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml"
|
||||||
if distrib_config.exists():
|
if distrib_config.exists():
|
||||||
logger.debug(f"Using built distribution: {distrib_config}")
|
logger.debug(f"Using built distribution: {distrib_config}")
|
||||||
return distrib_config
|
return distrib_config
|
||||||
|
|
||||||
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / "config.yaml"
|
||||||
if distrib_config.exists():
|
if distrib_config.exists():
|
||||||
logger.debug(f"Using built distribution: {distrib_config}")
|
logger.debug(f"Using built distribution: {distrib_config}")
|
||||||
return distrib_config
|
return distrib_config
|
||||||
|
|
||||||
# Strategy 5: Failed - provide helpful error
|
# Strategy 5: Failed - provide helpful error
|
||||||
raise ValueError(_format_resolution_error(config_or_distro, mode))
|
raise ValueError(_format_resolution_error(config_or_distro))
|
||||||
|
|
||||||
|
|
||||||
def _get_distro_config_path(distro_name: str, mode: str) -> Path:
|
def _get_distro_config_path(distro_name: str, path: str | None = None) -> Path:
|
||||||
"""Get the config file path for a distro."""
|
"""Get the config file path for a distro."""
|
||||||
if not mode.endswith(".yaml"):
|
if not path or not path.endswith(".yaml"):
|
||||||
mode = f"{mode}.yaml"
|
path = "config.yaml"
|
||||||
return DISTRO_DIR / distro_name / mode
|
return DISTRO_DIR / distro_name / path
|
||||||
|
|
||||||
|
|
||||||
def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:
|
def _format_resolution_error(config_or_distro: str) -> str:
|
||||||
"""Format a helpful error message for resolution failures."""
|
"""Format a helpful error message for resolution failures."""
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
|
|
||||||
distro_path = _get_distro_config_path(config_or_distro, mode)
|
distro_path = _get_distro_config_path(config_or_distro)
|
||||||
distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml"
|
||||||
distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-config.yaml"
|
||||||
|
|
||||||
available_distros = _get_available_distros()
|
available_distros = _get_available_distros()
|
||||||
distros_str = ", ".join(available_distros) if available_distros else "none found"
|
distros_str = ", ".join(available_distros) if available_distros else "none found"
|
||||||
|
|
@ -111,7 +103,7 @@ Did you mean one of these distributions?
|
||||||
|
|
||||||
def _get_available_distros() -> list[str]:
|
def _get_available_distros() -> list[str]:
|
||||||
"""Get list of available distro names."""
|
"""Get list of available distro names."""
|
||||||
if not DISTRO_DIR.exists() and not DISTRIBS_BASE_DIR.exists():
|
if not DISTRO_DIR.exists() or not DISTRIBS_BASE_DIR.exists():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
return list(
|
return list(
|
||||||
|
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: CI tests for Llama Stack
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::cerebras
|
|
||||||
- provider_type: remote::ollama
|
|
||||||
- provider_type: remote::vllm
|
|
||||||
- provider_type: remote::tgi
|
|
||||||
- provider_type: remote::fireworks
|
|
||||||
- provider_type: remote::together
|
|
||||||
- provider_type: remote::bedrock
|
|
||||||
- provider_type: remote::nvidia
|
|
||||||
- provider_type: remote::openai
|
|
||||||
- provider_type: remote::anthropic
|
|
||||||
- provider_type: remote::gemini
|
|
||||||
- provider_type: remote::vertexai
|
|
||||||
- provider_type: remote::groq
|
|
||||||
- provider_type: remote::sambanova
|
|
||||||
- provider_type: remote::azure
|
|
||||||
- provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::faiss
|
|
||||||
- provider_type: inline::sqlite-vec
|
|
||||||
- provider_type: inline::milvus
|
|
||||||
- provider_type: remote::chromadb
|
|
||||||
- provider_type: remote::pgvector
|
|
||||||
- provider_type: remote::qdrant
|
|
||||||
- provider_type: remote::weaviate
|
|
||||||
files:
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
safety:
|
|
||||||
- provider_type: inline::llama-guard
|
|
||||||
- provider_type: inline::code-scanner
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
post_training:
|
|
||||||
- provider_type: inline::torchtune-cpu
|
|
||||||
eval:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- provider_type: remote::huggingface
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
- provider_type: inline::llm-as-judge
|
|
||||||
- provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: remote::brave-search
|
|
||||||
- provider_type: remote::tavily-search
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
- provider_type: remote::model-context-protocol
|
|
||||||
batches:
|
|
||||||
- provider_type: inline::reference
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- asyncpg
|
|
||||||
- psycopg2-binary
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
|
|
||||||
container
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::tgi
|
|
||||||
- provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::faiss
|
|
||||||
- provider_type: remote::chromadb
|
|
||||||
- provider_type: remote::pgvector
|
|
||||||
safety:
|
|
||||||
- provider_type: inline::llama-guard
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
eval:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- provider_type: remote::huggingface
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
- provider_type: inline::llm-as-judge
|
|
||||||
- provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: remote::brave-search
|
|
||||||
- provider_type: remote::tavily-search
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -111,7 +111,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"vector_io": [chromadb_provider],
|
"vector_io": [chromadb_provider],
|
||||||
|
|
|
||||||
|
|
@ -141,14 +141,14 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-config.yaml \
|
||||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
-e DEH_URL=$DEH_URL \
|
-e DEH_URL=$DEH_URL \
|
||||||
-e SAFETY_MODEL=$SAFETY_MODEL \
|
-e SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||||
-e CHROMA_URL=$CHROMA_URL \
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-config.yaml \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -157,16 +157,16 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
|
|
||||||
docker run -it \
|
docker run -it \
|
||||||
--pull always \
|
--pull always \
|
||||||
--network host \
|
--network host \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
-e DEH_URL=$DEH_URL \
|
-e DEH_URL=$DEH_URL \
|
||||||
-e CHROMA_URL=$CHROMA_URL \
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
|
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Meta Reference for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::faiss
|
|
||||||
- provider_type: remote::chromadb
|
|
||||||
- provider_type: remote::pgvector
|
|
||||||
safety:
|
|
||||||
- provider_type: inline::llama-guard
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
eval:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- provider_type: remote::huggingface
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
- provider_type: inline::llm-as-judge
|
|
||||||
- provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: remote::brave-search
|
|
||||||
- provider_type: remote::tavily-search
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
- provider_type: remote::model-context-protocol
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -73,8 +73,8 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
LLAMA_STACK_PORT=8321
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
|
|
@ -83,8 +83,8 @@ docker run \
|
||||||
--gpu all \
|
--gpu all \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
@ -105,7 +105,7 @@ Make sure you have the Llama Stack CLI available.
|
||||||
```bash
|
```bash
|
||||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/{{ name }}/run.yaml \
|
llama stack run distributions/{{ name }}/config.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"vector_io": [vector_io_provider],
|
"vector_io": [vector_io_provider],
|
||||||
|
|
|
||||||
|
|
@ -1,29 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Use NVIDIA NIM for running LLM inference, evaluation and safety
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::nvidia
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::faiss
|
|
||||||
safety:
|
|
||||||
- provider_type: remote::nvidia
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
eval:
|
|
||||||
- provider_type: remote::nvidia
|
|
||||||
post_training:
|
|
||||||
- provider_type: remote::nvidia
|
|
||||||
datasetio:
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
- provider_type: remote::nvidia
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
files:
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -128,8 +128,8 @@ docker run \
|
||||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Set the path to your custom run.yaml file
|
# Set the path to your custom config.yaml file
|
||||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml
|
||||||
LLAMA_STACK_PORT=8321
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
|
|
@ -137,8 +137,8 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
-v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \
|
||||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
-e RUN_CONFIG_PATH=/app/custom-config.yaml \
|
||||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
|
|
@ -162,7 +162,7 @@ INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./config.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
"datasetio": [datasetio_provider],
|
"datasetio": [datasetio_provider],
|
||||||
|
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM
|
|
||||||
inference with scalable cloud services
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::oci
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::faiss
|
|
||||||
- provider_type: remote::chromadb
|
|
||||||
- provider_type: remote::pgvector
|
|
||||||
safety:
|
|
||||||
- provider_type: inline::llama-guard
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
eval:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- provider_type: remote::huggingface
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
- provider_type: inline::llm-as-judge
|
|
||||||
- provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: remote::brave-search
|
|
||||||
- provider_type: remote::tavily-search
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
- provider_type: remote::model-context-protocol
|
|
||||||
files:
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -74,7 +74,7 @@ def get_distribution_template(name: str = "oci") -> DistributionTemplate:
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
"vector_io": [vector_io_provider],
|
"vector_io": [vector_io_provider],
|
||||||
|
|
|
||||||
|
|
@ -1,36 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Distribution for running open benchmarks
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::openai
|
|
||||||
- provider_type: remote::anthropic
|
|
||||||
- provider_type: remote::gemini
|
|
||||||
- provider_type: remote::groq
|
|
||||||
- provider_type: remote::together
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::sqlite-vec
|
|
||||||
- provider_type: remote::chromadb
|
|
||||||
- provider_type: remote::pgvector
|
|
||||||
safety:
|
|
||||||
- provider_type: inline::llama-guard
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
eval:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- provider_type: remote::huggingface
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
- provider_type: inline::llm-as-judge
|
|
||||||
- provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: remote::brave-search
|
|
||||||
- provider_type: remote::tavily-search
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
- provider_type: remote::model-context-protocol
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -261,7 +261,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
providers=providers,
|
providers=providers,
|
||||||
available_models_by_provider=available_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": inference_providers,
|
"inference": inference_providers,
|
||||||
"vector_io": vector_io_providers,
|
"vector_io": vector_io_providers,
|
||||||
|
|
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Quick start template for running Llama Stack with several popular providers.
|
|
||||||
This distribution is intended for GPU-enabled environments.
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::cerebras
|
|
||||||
- provider_type: remote::ollama
|
|
||||||
- provider_type: remote::vllm
|
|
||||||
- provider_type: remote::tgi
|
|
||||||
- provider_type: remote::fireworks
|
|
||||||
- provider_type: remote::together
|
|
||||||
- provider_type: remote::bedrock
|
|
||||||
- provider_type: remote::nvidia
|
|
||||||
- provider_type: remote::openai
|
|
||||||
- provider_type: remote::anthropic
|
|
||||||
- provider_type: remote::gemini
|
|
||||||
- provider_type: remote::vertexai
|
|
||||||
- provider_type: remote::groq
|
|
||||||
- provider_type: remote::sambanova
|
|
||||||
- provider_type: remote::azure
|
|
||||||
- provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::faiss
|
|
||||||
- provider_type: inline::sqlite-vec
|
|
||||||
- provider_type: inline::milvus
|
|
||||||
- provider_type: remote::chromadb
|
|
||||||
- provider_type: remote::pgvector
|
|
||||||
- provider_type: remote::qdrant
|
|
||||||
- provider_type: remote::weaviate
|
|
||||||
files:
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
safety:
|
|
||||||
- provider_type: inline::llama-guard
|
|
||||||
- provider_type: inline::code-scanner
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
post_training:
|
|
||||||
- provider_type: inline::huggingface-gpu
|
|
||||||
eval:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- provider_type: remote::huggingface
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
- provider_type: inline::llm-as-judge
|
|
||||||
- provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: remote::brave-search
|
|
||||||
- provider_type: remote::tavily-search
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
- provider_type: remote::model-context-protocol
|
|
||||||
batches:
|
|
||||||
- provider_type: inline::reference
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- asyncpg
|
|
||||||
- psycopg2-binary
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Quick start template for running Llama Stack with several popular providers.
|
|
||||||
This distribution is intended for CPU-only environments.
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::cerebras
|
|
||||||
- provider_type: remote::ollama
|
|
||||||
- provider_type: remote::vllm
|
|
||||||
- provider_type: remote::tgi
|
|
||||||
- provider_type: remote::fireworks
|
|
||||||
- provider_type: remote::together
|
|
||||||
- provider_type: remote::bedrock
|
|
||||||
- provider_type: remote::nvidia
|
|
||||||
- provider_type: remote::openai
|
|
||||||
- provider_type: remote::anthropic
|
|
||||||
- provider_type: remote::gemini
|
|
||||||
- provider_type: remote::vertexai
|
|
||||||
- provider_type: remote::groq
|
|
||||||
- provider_type: remote::sambanova
|
|
||||||
- provider_type: remote::azure
|
|
||||||
- provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::faiss
|
|
||||||
- provider_type: inline::sqlite-vec
|
|
||||||
- provider_type: inline::milvus
|
|
||||||
- provider_type: remote::chromadb
|
|
||||||
- provider_type: remote::pgvector
|
|
||||||
- provider_type: remote::qdrant
|
|
||||||
- provider_type: remote::weaviate
|
|
||||||
files:
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
safety:
|
|
||||||
- provider_type: inline::llama-guard
|
|
||||||
- provider_type: inline::code-scanner
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
post_training:
|
|
||||||
- provider_type: inline::torchtune-cpu
|
|
||||||
eval:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- provider_type: remote::huggingface
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
- provider_type: inline::llm-as-judge
|
|
||||||
- provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: remote::brave-search
|
|
||||||
- provider_type: remote::tavily-search
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
- provider_type: remote::model-context-protocol
|
|
||||||
batches:
|
|
||||||
- provider_type: inline::reference
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- asyncpg
|
|
||||||
- psycopg2-binary
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -275,9 +275,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": base_run_settings,
|
"config.yaml": base_run_settings,
|
||||||
"run-with-postgres-store.yaml": postgres_run_settings,
|
"run-with-postgres-store.yaml": postgres_run_settings,
|
||||||
},
|
},
|
||||||
run_config_env_vars={
|
run_config_env_vars={
|
||||||
|
|
|
||||||
|
|
@ -16,10 +16,8 @@ from llama_stack.core.datatypes import (
|
||||||
LLAMA_STACK_RUN_CONFIG_VERSION,
|
LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||||
Api,
|
Api,
|
||||||
BenchmarkInput,
|
BenchmarkInput,
|
||||||
BuildConfig,
|
|
||||||
BuildProvider,
|
BuildProvider,
|
||||||
DatasetInput,
|
DatasetInput,
|
||||||
DistributionSpec,
|
|
||||||
ModelInput,
|
ModelInput,
|
||||||
Provider,
|
Provider,
|
||||||
SafetyConfig,
|
SafetyConfig,
|
||||||
|
|
@ -35,11 +33,8 @@ from llama_stack.core.storage.datatypes import (
|
||||||
StorageBackendType,
|
StorageBackendType,
|
||||||
)
|
)
|
||||||
from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
|
from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
|
||||||
from llama_stack.core.storage.kvstore.config import get_pip_packages as get_kv_pip_packages
|
|
||||||
from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
|
from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||||
from llama_stack.core.storage.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
|
|
||||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
|
||||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||||
from llama_stack_api import DatasetPurpose, ModelType
|
from llama_stack_api import DatasetPurpose, ModelType
|
||||||
|
|
||||||
|
|
@ -319,55 +314,6 @@ class DistributionTemplate(BaseModel):
|
||||||
|
|
||||||
available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
|
available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
|
||||||
|
|
||||||
# we may want to specify additional pip packages without necessarily indicating a
|
|
||||||
# specific "default" inference store (which is what typically used to dictate additional
|
|
||||||
# pip packages)
|
|
||||||
additional_pip_packages: list[str] | None = None
|
|
||||||
|
|
||||||
def build_config(self) -> BuildConfig:
|
|
||||||
additional_pip_packages: list[str] = []
|
|
||||||
for run_config in self.run_configs.values():
|
|
||||||
run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
|
|
||||||
|
|
||||||
# TODO: This is a hack to get the dependencies for internal APIs into build
|
|
||||||
# We should have a better way to do this by formalizing the concept of "internal" APIs
|
|
||||||
# and providers, with a way to specify dependencies for them.
|
|
||||||
|
|
||||||
storage_cfg = run_config_.get("storage", {})
|
|
||||||
for backend_cfg in storage_cfg.get("backends", {}).values():
|
|
||||||
store_type = backend_cfg.get("type")
|
|
||||||
if not store_type:
|
|
||||||
continue
|
|
||||||
if str(store_type).startswith("kv_"):
|
|
||||||
additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
|
|
||||||
elif str(store_type).startswith("sql_"):
|
|
||||||
additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
|
|
||||||
|
|
||||||
if self.additional_pip_packages:
|
|
||||||
additional_pip_packages.extend(self.additional_pip_packages)
|
|
||||||
|
|
||||||
# Create minimal providers for build config (without runtime configs)
|
|
||||||
build_providers = {}
|
|
||||||
for api, providers in self.providers.items():
|
|
||||||
build_providers[api] = []
|
|
||||||
for provider in providers:
|
|
||||||
# Create a minimal build provider object with only essential build information
|
|
||||||
build_provider = BuildProvider(
|
|
||||||
provider_type=provider.provider_type,
|
|
||||||
module=provider.module,
|
|
||||||
)
|
|
||||||
build_providers[api].append(build_provider)
|
|
||||||
|
|
||||||
return BuildConfig(
|
|
||||||
distribution_spec=DistributionSpec(
|
|
||||||
description=self.description,
|
|
||||||
container_image=self.container_image,
|
|
||||||
providers=build_providers,
|
|
||||||
),
|
|
||||||
image_type=LlamaStackImageType.VENV.value, # default to venv
|
|
||||||
additional_pip_packages=sorted(set(additional_pip_packages)),
|
|
||||||
)
|
|
||||||
|
|
||||||
def generate_markdown_docs(self) -> str:
|
def generate_markdown_docs(self) -> str:
|
||||||
providers_table = "| API | Provider(s) |\n"
|
providers_table = "| API | Provider(s) |\n"
|
||||||
providers_table += "|-----|-------------|\n"
|
providers_table += "|-----|-------------|\n"
|
||||||
|
|
@ -439,14 +385,6 @@ class DistributionTemplate(BaseModel):
|
||||||
for output_dir in [yaml_output_dir, doc_output_dir]:
|
for output_dir in [yaml_output_dir, doc_output_dir]:
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
build_config = self.build_config()
|
|
||||||
with open(yaml_output_dir / "build.yaml", "w") as f:
|
|
||||||
yaml.safe_dump(
|
|
||||||
filter_empty_values(build_config.model_dump(exclude_none=True)),
|
|
||||||
f,
|
|
||||||
sort_keys=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
for yaml_pth, settings in self.run_configs.items():
|
for yaml_pth, settings in self.run_configs.items():
|
||||||
run_config = settings.run_config(self.name, self.providers, self.container_image)
|
run_config = settings.run_config(self.name, self.providers, self.container_image)
|
||||||
with open(yaml_output_dir / yaml_pth, "w") as f:
|
with open(yaml_output_dir / yaml_pth, "w") as f:
|
||||||
|
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Use watsonx for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::watsonx
|
|
||||||
- provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
|
||||||
- provider_type: inline::faiss
|
|
||||||
safety:
|
|
||||||
- provider_type: inline::llama-guard
|
|
||||||
agents:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
eval:
|
|
||||||
- provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
|
||||||
- provider_type: remote::huggingface
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
scoring:
|
|
||||||
- provider_type: inline::basic
|
|
||||||
- provider_type: inline::llm-as-judge
|
|
||||||
- provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
|
||||||
- provider_type: remote::brave-search
|
|
||||||
- provider_type: remote::tavily-search
|
|
||||||
- provider_type: inline::rag-runtime
|
|
||||||
- provider_type: remote::model-context-protocol
|
|
||||||
files:
|
|
||||||
- provider_type: inline::localfs
|
|
||||||
image_type: venv
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -69,7 +69,7 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"config.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
"files": [files_provider],
|
"files": [files_provider],
|
||||||
|
|
|
||||||
|
|
@ -91,10 +91,10 @@ def config_to_category_levels(category: str, level: str):
|
||||||
|
|
||||||
def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
|
def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
|
||||||
"""
|
"""
|
||||||
Helper function to parse a yaml logging configuration found in the run.yaml
|
Helper function to parse a yaml logging configuration found in the config.yaml
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
yaml_config (Logging): the logger config object found in the run.yaml
|
yaml_config (Logging): the logger config object found in the config.yaml
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict[str, int]: A dictionary mapping categories to their log levels.
|
Dict[str, int]: A dictionary mapping categories to their log levels.
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ class MetaReferenceInferenceConfig(BaseModel):
|
||||||
# this is a placeholder to indicate inference model id
|
# this is a placeholder to indicate inference model id
|
||||||
# the actual inference model id is dtermined by the moddel id in the request
|
# the actual inference model id is dtermined by the moddel id in the request
|
||||||
# Note: you need to register the model before using it for inference
|
# Note: you need to register the model before using it for inference
|
||||||
# models in the resouce list in the run.yaml config will be registered automatically
|
# models in the resouce list in the config.yaml config will be registered automatically
|
||||||
model: str | None = None
|
model: str | None = None
|
||||||
torch_seed: int | None = None
|
torch_seed: int | None = None
|
||||||
max_seq_len: int = 4096
|
max_seq_len: int = 4096
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,9 @@ class _HfAdapter(OpenAIMixin):
|
||||||
class TGIAdapter(_HfAdapter):
|
class TGIAdapter(_HfAdapter):
|
||||||
async def initialize(self, config: TGIImplConfig) -> None:
|
async def initialize(self, config: TGIImplConfig) -> None:
|
||||||
if not config.base_url:
|
if not config.base_url:
|
||||||
raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
|
raise ValueError(
|
||||||
|
"You must provide a URL in config.yaml (or via the TGI_URL environment variable) to use TGI."
|
||||||
|
)
|
||||||
log.info(f"Initializing TGI client with url={config.base_url}")
|
log.info(f"Initializing TGI client with url={config.base_url}")
|
||||||
# Extract base URL without /v1 for HF client initialization
|
# Extract base URL without /v1 for HF client initialization
|
||||||
base_url_str = str(config.base_url).rstrip("/")
|
base_url_str = str(config.base_url).rstrip("/")
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
if not self.config.base_url:
|
if not self.config.base_url:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
|
"You must provide a URL in config.yaml (or via the VLLM_URL environment variable) to use vLLM."
|
||||||
)
|
)
|
||||||
|
|
||||||
async def health(self) -> HealthResponse:
|
async def health(self) -> HealthResponse:
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ For running integration tests, you must provide a few things:
|
||||||
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
||||||
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:starter:8322`)
|
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:starter:8322`)
|
||||||
- a URL which points to a Llama Stack distribution server
|
- a URL which points to a Llama Stack distribution server
|
||||||
- a distribution name (e.g., `starter`) or a path to a `run.yaml` file
|
- a distribution name (e.g., `starter`) or a path to a `config.yaml` file
|
||||||
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||||
|
|
||||||
- Any API keys you need to use should be set in the environment, or can be passed in with the --env option.
|
- Any API keys you need to use should be set in the environment, or can be passed in with the --env option.
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,10 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Backward compatibility test for run.yaml files.
|
Backward compatibility test for config.yaml files.
|
||||||
|
|
||||||
This test ensures that changes to StackRunConfig don't break
|
This test ensures that changes to StackRunConfig don't break
|
||||||
existing run.yaml files from previous versions.
|
existing config.yaml files from previous versions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
@ -17,7 +17,7 @@ from pathlib import Path
|
||||||
import pytest
|
import pytest
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.core.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackConfig
|
||||||
|
|
||||||
|
|
||||||
def get_test_configs():
|
def get_test_configs():
|
||||||
|
|
@ -36,10 +36,10 @@ def get_test_configs():
|
||||||
else:
|
else:
|
||||||
# Local mode: test current distribution configs
|
# Local mode: test current distribution configs
|
||||||
repo_root = Path(__file__).parent.parent.parent
|
repo_root = Path(__file__).parent.parent.parent
|
||||||
config_files = sorted((repo_root / "src" / "llama_stack" / "distributions").glob("*/run.yaml"))
|
config_files = sorted((repo_root / "src" / "llama_stack" / "distributions").glob("*/config.yaml"))
|
||||||
|
|
||||||
if not config_files:
|
if not config_files:
|
||||||
pytest.skip("No run.yaml files found in distributions/")
|
pytest.skip("No config.yaml files found in distributions/")
|
||||||
|
|
||||||
return config_files
|
return config_files
|
||||||
|
|
||||||
|
|
@ -49,4 +49,4 @@ def test_load_run_config(config_file):
|
||||||
with open(config_file) as f:
|
with open(config_file) as f:
|
||||||
config_data = yaml.safe_load(f)
|
config_data = yaml.safe_load(f)
|
||||||
|
|
||||||
StackRunConfig.model_validate(config_data)
|
StackConfig.model_validate(config_data)
|
||||||
|
|
|
||||||
10
tests/external/build.yaml
vendored
10
tests/external/build.yaml
vendored
|
|
@ -1,10 +0,0 @@
|
||||||
version: '2'
|
|
||||||
distribution_spec:
|
|
||||||
description: Custom distro for CI tests
|
|
||||||
providers:
|
|
||||||
weather:
|
|
||||||
- provider_type: remote::kaze
|
|
||||||
image_type: venv
|
|
||||||
image_name: ci-test
|
|
||||||
external_providers_dir: ~/.llama/providers.d
|
|
||||||
external_apis_dir: ~/.llama/apis.d
|
|
||||||
13
tests/external/ramalama-stack/build.yaml
vendored
13
tests/external/ramalama-stack/build.yaml
vendored
|
|
@ -1,13 +0,0 @@
|
||||||
version: 2
|
|
||||||
distribution_spec:
|
|
||||||
description: Use (an external) Ramalama server for running LLM inference
|
|
||||||
container_image: null
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_type: remote::ramalama
|
|
||||||
module: ramalama_stack==0.3.0a0
|
|
||||||
image_type: venv
|
|
||||||
image_name: ramalama-stack-test
|
|
||||||
additional_pip_packages:
|
|
||||||
- aiosqlite
|
|
||||||
- sqlalchemy[asyncio]
|
|
||||||
|
|
@ -25,7 +25,7 @@ Here are the most important options:
|
||||||
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
||||||
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:starter:8322`)
|
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:starter:8322`)
|
||||||
- a URL which points to a Llama Stack distribution server
|
- a URL which points to a Llama Stack distribution server
|
||||||
- a distribution name (e.g., `starter`) or a path to a `run.yaml` file
|
- a distribution name (e.g., `starter`) or a path to a `config.yaml` file
|
||||||
- a comma-separated list of api=provider pairs, e.g. `inference=ollama,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
- a comma-separated list of api=provider pairs, e.g. `inference=ollama,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||||
- `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.
|
- `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -151,7 +151,7 @@ def pytest_addoption(parser):
|
||||||
"""
|
"""
|
||||||
a 'pointer' to the stack. this can be either be:
|
a 'pointer' to the stack. this can be either be:
|
||||||
(a) a template name like `starter`, or
|
(a) a template name like `starter`, or
|
||||||
(b) a path to a run.yaml file, or
|
(b) a path to a config.yaml file, or
|
||||||
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
|
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
|
||||||
(d) a server config like `server:ci-tests`, or
|
(d) a server config like `server:ci-tests`, or
|
||||||
(e) a docker config like `docker:ci-tests` (builds and runs container)
|
(e) a docker config like `docker:ci-tests` (builds and runs container)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.core.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.core.storage.datatypes import (
|
from llama_stack.core.storage.datatypes import (
|
||||||
PostgresKVStoreConfig,
|
PostgresKVStoreConfig,
|
||||||
PostgresSqlStoreConfig,
|
PostgresSqlStoreConfig,
|
||||||
|
|
@ -17,10 +17,10 @@ from llama_stack.core.storage.datatypes import (
|
||||||
|
|
||||||
def test_starter_distribution_config_loads_and_resolves():
|
def test_starter_distribution_config_loads_and_resolves():
|
||||||
"""Integration: Actual starter config should parse and have correct storage structure."""
|
"""Integration: Actual starter config should parse and have correct storage structure."""
|
||||||
with open("llama_stack/distributions/starter/run.yaml") as f:
|
with open("llama_stack/distributions/starter/config.yaml") as f:
|
||||||
config_dict = yaml.safe_load(f)
|
config_dict = yaml.safe_load(f)
|
||||||
|
|
||||||
config = StackRunConfig(**config_dict)
|
config = StackConfig(**config_dict)
|
||||||
|
|
||||||
# Config should have named backends and explicit store references
|
# Config should have named backends and explicit store references
|
||||||
assert config.storage is not None
|
assert config.storage is not None
|
||||||
|
|
@ -47,10 +47,10 @@ def test_starter_distribution_config_loads_and_resolves():
|
||||||
|
|
||||||
def test_postgres_demo_distribution_config_loads():
|
def test_postgres_demo_distribution_config_loads():
|
||||||
"""Integration: Postgres demo should use Postgres backend for all stores."""
|
"""Integration: Postgres demo should use Postgres backend for all stores."""
|
||||||
with open("llama_stack/distributions/postgres-demo/run.yaml") as f:
|
with open("llama_stack/distributions/postgres-demo/config.yaml") as f:
|
||||||
config_dict = yaml.safe_load(f)
|
config_dict = yaml.safe_load(f)
|
||||||
|
|
||||||
config = StackRunConfig(**config_dict)
|
config = StackConfig(**config_dict)
|
||||||
|
|
||||||
# Should have postgres backend
|
# Should have postgres backend
|
||||||
assert config.storage is not None
|
assert config.storage is not None
|
||||||
|
|
|
||||||
|
|
@ -299,7 +299,7 @@ def test_providers_flag_generates_config_with_api_keys():
|
||||||
# Read the generated config file
|
# Read the generated config file
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
|
|
||||||
config_file = DISTRIBS_BASE_DIR / "providers-run" / "run.yaml"
|
config_file = DISTRIBS_BASE_DIR / "providers-run" / "config.yaml"
|
||||||
with open(config_file) as f:
|
with open(config_file) as f:
|
||||||
config_dict = yaml.safe_load(f)
|
config_dict = yaml.safe_load(f)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from llama_stack.core.conversations.conversations import (
|
||||||
ConversationServiceConfig,
|
ConversationServiceConfig,
|
||||||
ConversationServiceImpl,
|
ConversationServiceImpl,
|
||||||
)
|
)
|
||||||
from llama_stack.core.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.core.storage.datatypes import (
|
from llama_stack.core.storage.datatypes import (
|
||||||
ServerStoresConfig,
|
ServerStoresConfig,
|
||||||
SqliteSqlStoreConfig,
|
SqliteSqlStoreConfig,
|
||||||
|
|
@ -44,9 +44,9 @@ async def service():
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
|
register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
|
||||||
run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
|
stack_config = StackConfig(image_name="test", apis=[], providers={}, storage=storage)
|
||||||
|
|
||||||
config = ConversationServiceConfig(run_config=run_config, policy=[])
|
config = ConversationServiceConfig(config=stack_config, policy=[])
|
||||||
service = ConversationServiceImpl(config, {})
|
service = ConversationServiceImpl(config, {})
|
||||||
await service.initialize()
|
await service.initialize()
|
||||||
yield service
|
yield service
|
||||||
|
|
@ -151,9 +151,9 @@ async def test_policy_configuration():
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
|
register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
|
||||||
run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
|
stack_config = StackConfig(image_name="test", apis=[], providers={}, storage=storage)
|
||||||
|
|
||||||
config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy)
|
config = ConversationServiceConfig(config=stack_config, policy=restrictive_policy)
|
||||||
service = ConversationServiceImpl(config, {})
|
service = ConversationServiceImpl(config, {})
|
||||||
await service.initialize()
|
await service.initialize()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from unittest.mock import AsyncMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, VectorStoresConfig
|
from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig
|
||||||
from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
|
from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
|
||||||
from llama_stack.core.storage.datatypes import ServerStoresConfig, StorageConfig
|
from llama_stack.core.storage.datatypes import ServerStoresConfig, StorageConfig
|
||||||
from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
|
from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
|
||||||
|
|
@ -19,7 +19,7 @@ from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model,
|
||||||
class TestVectorStoresValidation:
|
class TestVectorStoresValidation:
|
||||||
async def test_validate_missing_model(self):
|
async def test_validate_missing_model(self):
|
||||||
"""Test validation fails when model not found."""
|
"""Test validation fails when model not found."""
|
||||||
run_config = StackRunConfig(
|
run_config = StackConfig(
|
||||||
image_name="test",
|
image_name="test",
|
||||||
providers={},
|
providers={},
|
||||||
storage=StorageConfig(
|
storage=StorageConfig(
|
||||||
|
|
@ -47,7 +47,7 @@ class TestVectorStoresValidation:
|
||||||
|
|
||||||
async def test_validate_success(self):
|
async def test_validate_success(self):
|
||||||
"""Test validation passes with valid model."""
|
"""Test validation passes with valid model."""
|
||||||
run_config = StackRunConfig(
|
run_config = StackConfig(
|
||||||
image_name="test",
|
image_name="test",
|
||||||
providers={},
|
providers={},
|
||||||
storage=StorageConfig(
|
storage=StorageConfig(
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from pydantic import ValidationError
|
||||||
|
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
LLAMA_STACK_RUN_CONFIG_VERSION,
|
LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||||
StackRunConfig,
|
StackConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.core.storage.datatypes import (
|
from llama_stack.core.storage.datatypes import (
|
||||||
InferenceStoreReference,
|
InferenceStoreReference,
|
||||||
|
|
@ -51,7 +51,7 @@ def _base_run_config(**overrides):
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
return StackRunConfig(
|
return StackConfig(
|
||||||
version=LLAMA_STACK_RUN_CONFIG_VERSION,
|
version=LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||||
image_name="test-distro",
|
image_name="test-distro",
|
||||||
apis=[],
|
apis=[],
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import pytest
|
||||||
import yaml
|
import yaml
|
||||||
from pydantic import BaseModel, Field, ValidationError
|
from pydantic import BaseModel, Field, ValidationError
|
||||||
|
|
||||||
from llama_stack.core.datatypes import Api, Provider, StackRunConfig
|
from llama_stack.core.datatypes import Api, Provider, StackConfig
|
||||||
from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry, providable_apis
|
from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry, providable_apis
|
||||||
from llama_stack.core.storage.datatypes import (
|
from llama_stack.core.storage.datatypes import (
|
||||||
InferenceStoreReference,
|
InferenceStoreReference,
|
||||||
|
|
@ -53,7 +53,7 @@ def _default_storage() -> StorageConfig:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def make_stack_config(**overrides) -> StackRunConfig:
|
def make_stack_config(**overrides) -> StackConfig:
|
||||||
storage = overrides.pop("storage", _default_storage())
|
storage = overrides.pop("storage", _default_storage())
|
||||||
defaults = dict(
|
defaults = dict(
|
||||||
image_name="test_image",
|
image_name="test_image",
|
||||||
|
|
@ -62,7 +62,7 @@ def make_stack_config(**overrides) -> StackRunConfig:
|
||||||
storage=storage,
|
storage=storage,
|
||||||
)
|
)
|
||||||
defaults.update(overrides)
|
defaults.update(overrides)
|
||||||
return StackRunConfig(**defaults)
|
return StackConfig(**defaults)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -270,7 +270,7 @@ class TestProviderRegistry:
|
||||||
external_providers_dir="/nonexistent/dir",
|
external_providers_dir="/nonexistent/dir",
|
||||||
)
|
)
|
||||||
with pytest.raises(FileNotFoundError):
|
with pytest.raises(FileNotFoundError):
|
||||||
get_provider_registry(config)
|
get_provider_registry(config=config)
|
||||||
|
|
||||||
def test_empty_api_directory(self, api_directories, mock_providers, base_config):
|
def test_empty_api_directory(self, api_directories, mock_providers, base_config):
|
||||||
"""Test handling of empty API directory."""
|
"""Test handling of empty API directory."""
|
||||||
|
|
@ -339,7 +339,7 @@ pip_packages:
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
registry = get_provider_registry(config)
|
registry = get_provider_registry(config=config)
|
||||||
assert Api.inference in registry
|
assert Api.inference in registry
|
||||||
assert "external_test" in registry[Api.inference]
|
assert "external_test" in registry[Api.inference]
|
||||||
provider = registry[Api.inference]["external_test"]
|
provider = registry[Api.inference]["external_test"]
|
||||||
|
|
@ -368,7 +368,7 @@ pip_packages:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
with pytest.raises(ValueError) as exc_info:
|
with pytest.raises(ValueError) as exc_info:
|
||||||
get_provider_registry(config)
|
get_provider_registry(config=config)
|
||||||
assert "get_provider_spec not found" in str(exc_info.value)
|
assert "get_provider_spec not found" in str(exc_info.value)
|
||||||
|
|
||||||
def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers):
|
def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers):
|
||||||
|
|
@ -391,31 +391,29 @@ pip_packages:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
with pytest.raises(AttributeError):
|
with pytest.raises(AttributeError):
|
||||||
get_provider_registry(config)
|
get_provider_registry(config=config)
|
||||||
|
|
||||||
def test_external_provider_from_module_building(self, mock_providers):
|
def test_external_provider_from_module_listing(self, mock_providers):
|
||||||
"""Test loading an external provider from a module during build (building=True, partial spec)."""
|
"""Test loading an external provider from a module during list-deps (listing=True, partial spec)."""
|
||||||
from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack_api import Api
|
from llama_stack_api import Api
|
||||||
|
|
||||||
# No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
|
# No importlib patch needed, should not import module when listing
|
||||||
build_config = BuildConfig(
|
config = StackConfig(
|
||||||
version=2,
|
|
||||||
image_type="container",
|
|
||||||
image_name="test_image",
|
image_name="test_image",
|
||||||
distribution_spec=DistributionSpec(
|
apis=[],
|
||||||
description="test",
|
providers={
|
||||||
providers={
|
"inference": [
|
||||||
"inference": [
|
Provider(
|
||||||
BuildProvider(
|
provider_id="external_test",
|
||||||
provider_type="external_test",
|
provider_type="external_test",
|
||||||
module="external_test",
|
config={},
|
||||||
)
|
module="external_test",
|
||||||
]
|
)
|
||||||
},
|
]
|
||||||
),
|
},
|
||||||
)
|
)
|
||||||
registry = get_provider_registry(build_config)
|
registry = get_provider_registry(config=config, listing=True)
|
||||||
assert Api.inference in registry
|
assert Api.inference in registry
|
||||||
assert "external_test" in registry[Api.inference]
|
assert "external_test" in registry[Api.inference]
|
||||||
provider = registry[Api.inference]["external_test"]
|
provider = registry[Api.inference]["external_test"]
|
||||||
|
|
@ -448,7 +446,7 @@ class TestGetExternalProvidersFromModule:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, config, building=False)
|
result = get_external_providers_from_module(registry, config, listing=False)
|
||||||
# Should not add anything to registry
|
# Should not add anything to registry
|
||||||
assert len(result[Api.inference]) == 0
|
assert len(result[Api.inference]) == 0
|
||||||
|
|
||||||
|
|
@ -487,36 +485,34 @@ class TestGetExternalProvidersFromModule:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, config, building=False)
|
result = get_external_providers_from_module(registry, config, listing=False)
|
||||||
assert "versioned_test" in result[Api.inference]
|
assert "versioned_test" in result[Api.inference]
|
||||||
assert result[Api.inference]["versioned_test"].module == "versioned_test==1.0.0"
|
assert result[Api.inference]["versioned_test"].module == "versioned_test==1.0.0"
|
||||||
|
|
||||||
def test_buildconfig_does_not_import_module(self, mock_providers):
|
def test_buildconfig_does_not_import_module(self, mock_providers):
|
||||||
"""Test that BuildConfig does not import the module (building=True)."""
|
"""Test that StackConfig does not import the module when listing (listing=True)."""
|
||||||
from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.core.distribution import get_external_providers_from_module
|
from llama_stack.core.distribution import get_external_providers_from_module
|
||||||
|
|
||||||
build_config = BuildConfig(
|
config = StackConfig(
|
||||||
version=2,
|
|
||||||
image_type="container",
|
|
||||||
image_name="test_image",
|
image_name="test_image",
|
||||||
distribution_spec=DistributionSpec(
|
apis=[],
|
||||||
description="test",
|
providers={
|
||||||
providers={
|
"inference": [
|
||||||
"inference": [
|
Provider(
|
||||||
BuildProvider(
|
provider_id="build_test",
|
||||||
provider_type="build_test",
|
provider_type="build_test",
|
||||||
module="build_test==1.0.0",
|
config={},
|
||||||
)
|
module="build_test==1.0.0",
|
||||||
]
|
)
|
||||||
},
|
]
|
||||||
),
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Should not call import_module at all when building
|
# Should not call import_module at all when listing
|
||||||
with patch("importlib.import_module") as mock_import:
|
with patch("importlib.import_module") as mock_import:
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, build_config, building=True)
|
result = get_external_providers_from_module(registry, config, listing=True)
|
||||||
|
|
||||||
# Verify module was NOT imported
|
# Verify module was NOT imported
|
||||||
mock_import.assert_not_called()
|
mock_import.assert_not_called()
|
||||||
|
|
@ -530,35 +526,31 @@ class TestGetExternalProvidersFromModule:
|
||||||
assert provider.api == Api.inference
|
assert provider.api == Api.inference
|
||||||
|
|
||||||
def test_buildconfig_multiple_providers(self, mock_providers):
|
def test_buildconfig_multiple_providers(self, mock_providers):
|
||||||
"""Test BuildConfig with multiple providers for the same API."""
|
"""Test StackConfig with multiple providers for the same API."""
|
||||||
from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
|
from llama_stack.core.datatypes import StackConfig
|
||||||
from llama_stack.core.distribution import get_external_providers_from_module
|
from llama_stack.core.distribution import get_external_providers_from_module
|
||||||
|
|
||||||
build_config = BuildConfig(
|
config = StackConfig(
|
||||||
version=2,
|
|
||||||
image_type="container",
|
|
||||||
image_name="test_image",
|
image_name="test_image",
|
||||||
distribution_spec=DistributionSpec(
|
apis=[],
|
||||||
description="test",
|
providers={
|
||||||
providers={
|
"inference": [
|
||||||
"inference": [
|
Provider(provider_id="provider1", provider_type="provider1", config={}, module="provider1"),
|
||||||
BuildProvider(provider_type="provider1", module="provider1"),
|
Provider(provider_id="provider2", provider_type="provider2", config={}, module="provider2"),
|
||||||
BuildProvider(provider_type="provider2", module="provider2"),
|
]
|
||||||
]
|
},
|
||||||
},
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("importlib.import_module") as mock_import:
|
with patch("importlib.import_module") as mock_import:
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, build_config, building=True)
|
result = get_external_providers_from_module(registry, config, listing=True)
|
||||||
|
|
||||||
mock_import.assert_not_called()
|
mock_import.assert_not_called()
|
||||||
assert "provider1" in result[Api.inference]
|
assert "provider1" in result[Api.inference]
|
||||||
assert "provider2" in result[Api.inference]
|
assert "provider2" in result[Api.inference]
|
||||||
|
|
||||||
def test_distributionspec_does_not_import_module(self, mock_providers):
|
def test_distributionspec_does_not_import_module(self, mock_providers):
|
||||||
"""Test that DistributionSpec does not import the module (building=True)."""
|
"""Test that DistributionSpec does not import the module (listing=True)."""
|
||||||
from llama_stack.core.datatypes import BuildProvider, DistributionSpec
|
from llama_stack.core.datatypes import BuildProvider, DistributionSpec
|
||||||
from llama_stack.core.distribution import get_external_providers_from_module
|
from llama_stack.core.distribution import get_external_providers_from_module
|
||||||
|
|
||||||
|
|
@ -574,10 +566,10 @@ class TestGetExternalProvidersFromModule:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Should not call import_module at all when building
|
# Should not call import_module at all when listing
|
||||||
with patch("importlib.import_module") as mock_import:
|
with patch("importlib.import_module") as mock_import:
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, dist_spec, building=True)
|
result = get_external_providers_from_module(registry, dist_spec, listing=True)
|
||||||
|
|
||||||
# Verify module was NOT imported
|
# Verify module was NOT imported
|
||||||
mock_import.assert_not_called()
|
mock_import.assert_not_called()
|
||||||
|
|
@ -631,7 +623,7 @@ class TestGetExternalProvidersFromModule:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, config, building=False)
|
result = get_external_providers_from_module(registry, config, listing=False)
|
||||||
|
|
||||||
# Only the matching provider_type should be added
|
# Only the matching provider_type should be added
|
||||||
assert "list_test" in result[Api.inference]
|
assert "list_test" in result[Api.inference]
|
||||||
|
|
@ -679,7 +671,7 @@ class TestGetExternalProvidersFromModule:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, config, building=False)
|
result = get_external_providers_from_module(registry, config, listing=False)
|
||||||
|
|
||||||
# Only the matching provider_type should be added
|
# Only the matching provider_type should be added
|
||||||
assert "wanted" in result[Api.inference]
|
assert "wanted" in result[Api.inference]
|
||||||
|
|
@ -734,7 +726,7 @@ class TestGetExternalProvidersFromModule:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, config, building=False)
|
result = get_external_providers_from_module(registry, config, listing=False)
|
||||||
|
|
||||||
# Both provider types should be added to registry
|
# Both provider types should be added to registry
|
||||||
assert "remote::ollama" in result[Api.inference]
|
assert "remote::ollama" in result[Api.inference]
|
||||||
|
|
@ -768,7 +760,7 @@ class TestGetExternalProvidersFromModule:
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
|
|
||||||
with pytest.raises(ValueError) as exc_info:
|
with pytest.raises(ValueError) as exc_info:
|
||||||
get_external_providers_from_module(registry, config, building=False)
|
get_external_providers_from_module(registry, config, listing=False)
|
||||||
|
|
||||||
assert "get_provider_spec not found" in str(exc_info.value)
|
assert "get_provider_spec not found" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
@ -805,7 +797,7 @@ class TestGetExternalProvidersFromModule:
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as exc_info:
|
with pytest.raises(RuntimeError) as exc_info:
|
||||||
get_external_providers_from_module(registry, config, building=False)
|
get_external_providers_from_module(registry, config, listing=False)
|
||||||
|
|
||||||
assert "Something went wrong" in str(exc_info.value)
|
assert "Something went wrong" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
@ -818,7 +810,7 @@ class TestGetExternalProvidersFromModule:
|
||||||
providers={},
|
providers={},
|
||||||
)
|
)
|
||||||
registry = {Api.inference: {}}
|
registry = {Api.inference: {}}
|
||||||
result = get_external_providers_from_module(registry, config, building=False)
|
result = get_external_providers_from_module(registry, config, listing=False)
|
||||||
|
|
||||||
# Should return registry unchanged
|
# Should return registry unchanged
|
||||||
assert result == registry
|
assert result == registry
|
||||||
|
|
@ -874,7 +866,7 @@ class TestGetExternalProvidersFromModule:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
registry = {Api.inference: {}, Api.safety: {}}
|
registry = {Api.inference: {}, Api.safety: {}}
|
||||||
result = get_external_providers_from_module(registry, config, building=False)
|
result = get_external_providers_from_module(registry, config, listing=False)
|
||||||
|
|
||||||
assert "inf_test" in result[Api.inference]
|
assert "inf_test" in result[Api.inference]
|
||||||
assert "safe_test" in result[Api.safety]
|
assert "safe_test" in result[Api.safety]
|
||||||
|
|
|
||||||
|
|
@ -31,8 +31,7 @@ def mock_distribs_base_dir(tmp_path):
|
||||||
# Create a custom distribution
|
# Create a custom distribution
|
||||||
starter_custom = custom_dir / "starter"
|
starter_custom = custom_dir / "starter"
|
||||||
starter_custom.mkdir()
|
starter_custom.mkdir()
|
||||||
(starter_custom / "starter-build.yaml").write_text("# build config")
|
(starter_custom / "starter-config.yaml").write_text("# config")
|
||||||
(starter_custom / "starter-run.yaml").write_text("# run config")
|
|
||||||
|
|
||||||
return custom_dir
|
return custom_dir
|
||||||
|
|
||||||
|
|
@ -47,8 +46,7 @@ def mock_distro_dir(tmp_path):
|
||||||
for distro_name in ["starter", "nvidia", "dell"]:
|
for distro_name in ["starter", "nvidia", "dell"]:
|
||||||
distro_path = distro_dir / distro_name
|
distro_path = distro_dir / distro_name
|
||||||
distro_path.mkdir()
|
distro_path.mkdir()
|
||||||
(distro_path / "build.yaml").write_text("# build config")
|
(distro_path / "config.yaml").write_text("# config")
|
||||||
(distro_path / "run.yaml").write_text("# run config")
|
|
||||||
|
|
||||||
return distro_dir
|
return distro_dir
|
||||||
|
|
||||||
|
|
@ -112,7 +110,7 @@ class TestStackList:
|
||||||
# Add a hidden directory
|
# Add a hidden directory
|
||||||
hidden_dir = mock_distro_dir / ".hidden"
|
hidden_dir = mock_distro_dir / ".hidden"
|
||||||
hidden_dir.mkdir()
|
hidden_dir.mkdir()
|
||||||
(hidden_dir / "build.yaml").write_text("# build")
|
(hidden_dir / "config.yaml").write_text("# config")
|
||||||
|
|
||||||
# Add a __pycache__ directory
|
# Add a __pycache__ directory
|
||||||
pycache_dir = mock_distro_dir / "__pycache__"
|
pycache_dir = mock_distro_dir / "__pycache__"
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ async def temp_prompt_store(tmp_path_factory):
|
||||||
temp_dir = tmp_path_factory.getbasetemp()
|
temp_dir = tmp_path_factory.getbasetemp()
|
||||||
db_path = str(temp_dir / f"{unique_id}.db")
|
db_path = str(temp_dir / f"{unique_id}.db")
|
||||||
|
|
||||||
from llama_stack.core.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackConfig
|
||||||
|
|
||||||
storage = StorageConfig(
|
storage = StorageConfig(
|
||||||
backends={
|
backends={
|
||||||
|
|
@ -41,13 +41,13 @@ async def temp_prompt_store(tmp_path_factory):
|
||||||
prompts=KVStoreReference(backend="kv_test", namespace="prompts"),
|
prompts=KVStoreReference(backend="kv_test", namespace="prompts"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
mock_run_config = StackRunConfig(
|
mock_run_config = StackConfig(
|
||||||
image_name="test-distribution",
|
image_name="test-distribution",
|
||||||
apis=[],
|
apis=[],
|
||||||
providers={},
|
providers={},
|
||||||
storage=storage,
|
storage=storage,
|
||||||
)
|
)
|
||||||
config = PromptServiceConfig(run_config=mock_run_config)
|
config = PromptServiceConfig(config=mock_run_config)
|
||||||
store = PromptServiceImpl(config, deps={})
|
store = PromptServiceImpl(config, deps={})
|
||||||
|
|
||||||
register_kvstore_backends({"kv_test": storage.backends["kv_test"]})
|
register_kvstore_backends({"kv_test": storage.backends["kv_test"]})
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue