Merge branch 'main' into openai-vector-store/qdrant

This commit is contained in:
ehhuang 2025-07-31 15:49:49 -07:00 committed by GitHub
commit 970d0f307f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
338 changed files with 15301 additions and 15997 deletions

View file

@ -0,0 +1,198 @@
name: 'Run and Record Tests'
description: 'Run integration tests and handle recording/artifact upload'
inputs:
test-types:
description: 'JSON array of test types to run'
required: true
stack-config:
description: 'Stack configuration to use'
required: true
provider:
description: 'Provider to use for tests'
required: true
inference-mode:
description: 'Inference mode (record or replay)'
required: true
run-vision-tests:
description: 'Whether to run vision tests'
required: false
default: 'false'
runs:
using: 'composite'
steps:
- name: Check Storage and Memory Available Before Tests
if: ${{ always() }}
shell: bash
run: |
free -h
df -h
- name: Set environment variables
shell: bash
run: |
echo "LLAMA_STACK_CLIENT_TIMEOUT=300" >> $GITHUB_ENV
echo "LLAMA_STACK_TEST_INFERENCE_MODE=${{ inputs.inference-mode }}" >> $GITHUB_ENV
# Configure provider-specific settings
if [ "${{ inputs.provider }}" == "ollama" ]; then
echo "OLLAMA_URL=http://0.0.0.0:11434" >> $GITHUB_ENV
echo "TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16" >> $GITHUB_ENV
echo "SAFETY_MODEL=ollama/llama-guard3:1b" >> $GITHUB_ENV
else
echo "VLLM_URL=http://localhost:8000/v1" >> $GITHUB_ENV
echo "TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct" >> $GITHUB_ENV
fi
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings/vision" >> $GITHUB_ENV
else
echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings" >> $GITHUB_ENV
fi
- name: Run Llama Stack Server
if: ${{ contains(inputs.stack-config, 'server:') }}
shell: bash
run: |
# Run this so pytest in a loop doesn't start-stop servers in a loop
echo "Starting Llama Stack Server"
nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 &
echo "Waiting for Llama Stack Server to start"
for i in {1..30}; do
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
echo "Llama Stack Server started"
exit 0
fi
sleep 1
done
echo "Llama Stack Server failed to start"
cat server.log
exit 1
- name: Run Integration Tests
shell: bash
run: |
stack_config="${{ inputs.stack-config }}"
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
# Configure provider-specific settings
if [ "${{ inputs.provider }}" == "ollama" ]; then
EXTRA_PARAMS="--safety-shield=llama-guard"
else
EXTRA_PARAMS=""
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
fi
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \
-k "not( ${EXCLUDE_TESTS} )" \
--vision-model=ollama/llama3.2-vision:11b \
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
--color=yes ${EXTRA_PARAMS} \
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-vision.log; then
echo "✅ Tests completed for vision"
else
echo "❌ Tests failed for vision"
exit 1
fi
exit 0
fi
# Run non-vision tests
TEST_TYPES='${{ inputs.test-types }}'
echo "Test types to run: $TEST_TYPES"
# Collect all test files for the specified test types
TEST_FILES=""
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
# if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
if [ "${{ inputs.provider }}" == "vllm" ]; then
if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
echo "Skipping $test_type for vllm provider"
continue
fi
fi
if [ -d "tests/integration/$test_type" ]; then
# Find all Python test files in this directory
test_files=$(find tests/integration/$test_type -name "test_*.py" -o -name "*_test.py")
if [ -n "$test_files" ]; then
TEST_FILES="$TEST_FILES $test_files"
echo "Added test files from $test_type: $(echo $test_files | wc -w) files"
fi
else
echo "Warning: Directory tests/integration/$test_type does not exist"
fi
done
if [ -z "$TEST_FILES" ]; then
echo "No test files found for the specified test types"
exit 1
fi
echo "=== Running all collected tests in a single pytest command ==="
echo "Total test files: $(echo $TEST_FILES | wc -w)"
if uv run pytest -s -v $TEST_FILES --stack-config=${stack_config} \
-k "not( ${EXCLUDE_TESTS} )" \
--text-model=$TEXT_MODEL \
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
--color=yes ${EXTRA_PARAMS} \
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-all.log; then
echo "✅ All tests completed successfully"
else
echo "❌ Tests failed"
exit 1
fi
- name: Check Storage and Memory Available After Tests
if: ${{ always() }}
shell: bash
run: |
free -h
df -h
- name: Commit and push recordings
if: ${{ inputs.inference-mode == 'record' }}
shell: bash
run: |
echo "Checking for recording changes"
git status --porcelain tests/integration/recordings/
if [[ -n $(git status --porcelain tests/integration/recordings/) ]]; then
echo "New recordings detected, committing and pushing"
git add tests/integration/recordings/
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
git commit -m "Recordings update from CI (vision)"
else
git commit -m "Recordings update from CI"
fi
git fetch origin ${{ github.event.pull_request.head.ref }}
git rebase origin/${{ github.event.pull_request.head.ref }}
echo "Rebased successfully"
git push origin HEAD:${{ github.event.pull_request.head.ref }}
echo "Pushed successfully"
else
echo "No recording changes"
fi
- name: Write inference logs to file
if: ${{ always() }}
shell: bash
run: |
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
- name: Upload logs
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
path: |
*.log
retention-days: 1

View file

@ -1,73 +0,0 @@
name: 'Run Integration Tests'
description: 'Run integration tests with configurable execution mode and provider settings'
inputs:
test-types:
description: 'Test types to run (JSON array)'
required: true
stack-config:
description: 'Stack configuration: "ci-tests" or "server:ci-tests"'
required: true
provider:
description: 'Provider to use: "ollama" or "vllm"'
required: true
inference-mode:
description: 'Inference mode: "record" or "replay"'
required: true
outputs:
logs-path:
description: 'Path to generated log files'
value: '*.log'
runs:
using: 'composite'
steps:
- name: Run Integration Tests
env:
LLAMA_STACK_CLIENT_TIMEOUT: "300"
LLAMA_STACK_TEST_RECORDING_DIR: "tests/integration/recordings"
LLAMA_STACK_TEST_INFERENCE_MODE: ${{ inputs.inference-mode }}
shell: bash
run: |
stack_config="${{ inputs.stack-config }}"
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
# Configure provider-specific settings
if [ "${{ inputs.provider }}" == "ollama" ]; then
export OLLAMA_URL="http://0.0.0.0:11434"
export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
export SAFETY_MODEL="ollama/llama-guard3:1b"
EXTRA_PARAMS="--safety-shield=llama-guard"
else
export VLLM_URL="http://localhost:8000/v1"
export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
EXTRA_PARAMS=""
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
fi
TEST_TYPES='${{ inputs.test-types }}'
echo "Test types to run: $TEST_TYPES"
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
# if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
if [ "${{ inputs.provider }}" == "vllm" ]; then
if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
continue
fi
fi
echo "=== Running tests for: $test_type ==="
if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \
-k "not( ${EXCLUDE_TESTS} )" \
--text-model=$TEXT_MODEL \
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
--color=yes ${EXTRA_PARAMS} \
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then
echo "✅ Tests completed for $test_type"
else
echo "❌ Tests failed for $test_type"
exit 1
fi
done

View file

@ -1,11 +1,23 @@
name: Setup Ollama
description: Start Ollama
inputs:
run-vision-tests:
description: 'Run vision tests: "true" or "false"'
required: false
default: 'false'
runs:
using: "composite"
steps:
- name: Start Ollama
shell: bash
run: |
docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
image="ollama-with-vision-model"
else
image="ollama-with-models"
fi
echo "Starting Ollama with image: $image"
docker run -d --name ollama -p 11434:11434 docker.io/llamastack/$image
echo "Verifying Ollama status..."
timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'

View file

@ -0,0 +1,51 @@
name: 'Setup Test Environment'
description: 'Common setup steps for integration tests including dependencies, providers, and build'
inputs:
python-version:
description: 'Python version to use'
required: true
client-version:
description: 'Client version (latest or published)'
required: true
provider:
description: 'Provider to setup (ollama or vllm)'
required: true
default: 'ollama'
run-vision-tests:
description: 'Whether to setup provider for vision tests'
required: false
default: 'false'
inference-mode:
description: 'Inference mode (record or replay)'
required: true
runs:
using: 'composite'
steps:
- name: Install dependencies
uses: ./.github/actions/setup-runner
with:
python-version: ${{ inputs.python-version }}
client-version: ${{ inputs.client-version }}
- name: Setup ollama
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-ollama
with:
run-vision-tests: ${{ inputs.run-vision-tests }}
- name: Setup vllm
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-vllm
- name: Build Llama Stack
shell: bash
run: |
uv run llama stack build --template ci-tests --image-type venv
- name: Configure git for commits
shell: bash
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"

View file

@ -1,6 +1,6 @@
# Llama Stack CI
Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
| Name | File | Purpose |
| ---- | ---- | ------- |
@ -8,7 +8,7 @@ Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration |
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |

View file

@ -1,22 +1,22 @@
name: Integration Tests
name: Integration Tests (Replay)
run-name: Run the integration test suite from tests/integration
run-name: Run the integration test suite from tests/integration in replay mode
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
types: [opened, synchronize, labeled]
types: [opened, synchronize, reopened]
paths:
- 'llama_stack/**'
- 'tests/**'
- 'uv.lock'
- 'pyproject.toml'
- 'requirements.txt'
- '.github/workflows/integration-tests.yml' # This workflow
- '.github/actions/setup-ollama/action.yml'
- '.github/actions/run-integration-tests/action.yml'
- '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml'
schedule:
# If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
@ -33,31 +33,15 @@ on:
default: 'ollama'
concurrency:
# This creates three concurrency groups:
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for valid triggers with re-record-tests label)
# ${{ github.workflow }}-${{ github.ref }}-replay (for valid triggers without re-record-tests label)
# ${{ github.workflow }}-${{ github.ref }}-no-run (for invalid triggers that will be skipped)
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
group: >-
${{ github.workflow }}-${{ github.ref }}-${{
(github.event.action == 'opened' ||
github.event.action == 'synchronize' ||
(github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))) &&
(contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'replay') ||
'no-run'
}}
# Skip concurrency for pushes to main - each commit should be tested independently
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
cancel-in-progress: true
jobs:
discover-tests:
if: |
github.event.action == 'opened' ||
github.event.action == 'synchronize' ||
(github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))
runs-on: ubuntu-latest
outputs:
test-types: ${{ steps.generate-test-types.outputs.test-types }}
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
steps:
- name: Checkout repository
@ -67,94 +51,13 @@ jobs:
id: generate-test-types
run: |
# Get test directories dynamically, excluding non-test directories
# NOTE: we are excluding post_training since the tests take too long
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings)$" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
- name: Check if re-record-tests label exists
id: check-rerecord-tests
run: |
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
else
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
fi
record-tests:
# Sequential job for recording to avoid SQLite conflicts
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
needs: discover-tests
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install dependencies
uses: ./.github/actions/setup-runner
with:
python-version: "3.12" # Use single Python version for recording
client-version: "latest"
- name: Setup ollama
if: ${{ inputs.test-provider == 'ollama' }}
uses: ./.github/actions/setup-ollama
- name: Setup vllm
if: ${{ inputs.test-provider == 'vllm' }}
uses: ./.github/actions/setup-vllm
- name: Build Llama Stack
run: |
uv run llama stack build --template ci-tests --image-type venv
- name: Configure git for commits
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
- name: Run Integration Tests for All Types (Recording Mode)
uses: ./.github/actions/run-integration-tests
with:
test-types: ${{ needs.discover-tests.outputs.test-types }}
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
provider: ${{ inputs.test-provider }}
inference-mode: 'record'
- name: Commit and push recordings
run: |
if ! git diff --quiet tests/integration/recordings/; then
echo "Committing recordings"
git add tests/integration/recordings/
git commit -m "Update recordings"
echo "Pushing all recording commits to PR"
git push origin HEAD:${{ github.head_ref }}
else
echo "No recording changes"
fi
- name: Write inference logs to file
if: ${{ always() }}
run: |
sudo docker logs ollama > ollama-recording.log || true
- name: Upload recording logs
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: recording-logs-${{ github.run_id }}
path: |
*.log
retention-days: 1
run-tests:
# Skip this job if we're in recording mode (handled by record-tests job)
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
run-replay-mode-tests:
needs: discover-tests
runs-on: ubuntu-latest
@ -164,48 +67,29 @@ jobs:
client-type: [library, server]
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
python-version: ["3.12", "3.13"]
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
run-vision-tests: ['true', 'false']
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }}
provider: ${{ matrix.provider }}
run-vision-tests: ${{ matrix.run-vision-tests }}
inference-mode: 'replay'
- name: Build Llama Stack
run: |
uv run llama stack build --template ci-tests --image-type venv
- name: Check Storage and Memory Available Before Tests
if: ${{ always() }}
run: |
free -h
df -h
- name: Run Integration Tests (Replay Mode)
uses: ./.github/actions/run-integration-tests
- name: Run tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: ${{ needs.discover-tests.outputs.test-types }}
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }}
inference-mode: 'replay'
- name: Check Storage and Memory Available After Tests
if: ${{ always() }}
run: |
free -h
df -h
- name: Upload test logs on failure
if: ${{ failure() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
path: |
*.log
retention-days: 1
run-vision-tests: ${{ matrix.run-vision-tests }}

View file

@ -9,8 +9,8 @@ on:
paths:
- 'llama_stack/cli/stack/build.py'
- 'llama_stack/cli/stack/_build.py'
- 'llama_stack/distribution/build.*'
- 'llama_stack/distribution/*.sh'
- 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml'
- 'llama_stack/templates/**'
- 'pyproject.toml'
@ -19,8 +19,8 @@ on:
paths:
- 'llama_stack/cli/stack/build.py'
- 'llama_stack/cli/stack/_build.py'
- 'llama_stack/distribution/build.*'
- 'llama_stack/distribution/*.sh'
- 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml'
- 'llama_stack/templates/**'
- 'pyproject.toml'
@ -108,7 +108,7 @@ jobs:
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
echo "Entrypoint: $entrypoint"
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then
echo "Entrypoint is not correct"
exit 1
fi
@ -142,7 +142,7 @@ jobs:
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
echo "Entrypoint: $entrypoint"
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then
echo "Entrypoint is not correct"
exit 1
fi

View file

@ -12,12 +12,13 @@ on:
- 'tests/integration/**'
- 'uv.lock'
- 'pyproject.toml'
- 'requirements.txt'
- 'tests/external/*'
- '.github/workflows/test-external-provider-module.yml' # This workflow
jobs:
test-external-providers-from-module:
# This workflow is disabled. See https://github.com/meta-llama/llama-stack/pull/2975#issuecomment-3138702984 for details
if: false
runs-on: ubuntu-latest
strategy:
matrix:
@ -47,7 +48,7 @@ jobs:
- name: Build distro from config file
run: |
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/ramalama-stack/build.yaml
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
- name: Start Llama Stack server in background
if: ${{ matrix.image-type }} == 'venv'

View file

@ -43,11 +43,11 @@ jobs:
- name: Print distro dependencies
run: |
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml --print-deps-only
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml --print-deps-only
- name: Build distro from config file
run: |
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml
- name: Start Llama Stack server in background
if: ${{ matrix.image-type }} == 'venv'

View file

@ -1,7 +1,7 @@
include pyproject.toml
include llama_stack/models/llama/llama3/tokenizer.model
include llama_stack/models/llama/llama4/tokenizer.model
include llama_stack/distribution/*.sh
include llama_stack.core/*.sh
include llama_stack/cli/scripts/*.sh
include llama_stack/templates/*/*.yaml
include llama_stack/providers/tests/test_cases/inference/*.json

View file

@ -6,7 +6,6 @@
[![Discord](https://img.shields.io/discord/1257833999603335178?color=6A7EC2&logo=discord&logoColor=ffffff)](https://discord.gg/llama-stack)
[![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
[![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
![coverage badge](./coverage.svg)
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)

View file

@ -15078,22 +15078,6 @@
"DPOAlignmentConfig": {
"type": "object",
"properties": {
"reward_scale": {
"type": "number",
"description": "Scaling factor for the reward signal"
},
"reward_clip": {
"type": "number",
"description": "Maximum absolute value for reward clipping"
},
"epsilon": {
"type": "number",
"description": "Small value added for numerical stability"
},
"gamma": {
"type": "number",
"description": "Discount factor for future rewards"
},
"beta": {
"type": "number",
"description": "Temperature parameter for the DPO loss"
@ -15106,10 +15090,6 @@
},
"additionalProperties": false,
"required": [
"reward_scale",
"reward_clip",
"epsilon",
"gamma",
"beta",
"loss_type"
],

View file

@ -11163,20 +11163,6 @@ components:
DPOAlignmentConfig:
type: object
properties:
reward_scale:
type: number
description: Scaling factor for the reward signal
reward_clip:
type: number
description: >-
Maximum absolute value for reward clipping
epsilon:
type: number
description: >-
Small value added for numerical stability
gamma:
type: number
description: Discount factor for future rewards
beta:
type: number
description: Temperature parameter for the DPO loss
@ -11186,10 +11172,6 @@ components:
description: The type of loss function to use for DPO
additionalProperties: false
required:
- reward_scale
- reward_clip
- epsilon
- gamma
- beta
- loss_type
title: DPOAlignmentConfig

View file

@ -165,7 +165,7 @@
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{

View file

@ -275,7 +275,7 @@
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{

View file

@ -265,7 +265,7 @@
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{

View file

@ -3216,19 +3216,19 @@
"INFO:datasets:Duckdb version 1.1.3 available.\n",
"INFO:datasets:TensorFlow version 2.18.0 available.\n",
"INFO:datasets:JAX version 0.4.33 available.\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::equality served by basic\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::subset_of served by basic\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
"INFO:llama_stack.distribution.stack:\n"
"INFO:llama_stack.core.stack:Scoring_fns: basic::equality served by basic\n",
"INFO:llama_stack.core.stack:Scoring_fns: basic::subset_of served by basic\n",
"INFO:llama_stack.core.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
"INFO:llama_stack.core.stack:\n"
]
},
{
@ -3448,7 +3448,7 @@
"\n",
"os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
"\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
"_ = client.initialize()"
]

View file

@ -48,7 +48,7 @@
"outputs": [],
"source": [
"from llama_stack_client import LlamaStackClient, Agent\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"from rich.pretty import pprint\n",
"import json\n",
"import uuid\n",

View file

@ -661,7 +661,7 @@
"except ImportError:\n",
" print(\"Not in Google Colab environment\")\n",
"\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"together\")\n",
"_ = client.initialize()"

View file

@ -35,7 +35,7 @@
],
"source": [
"from llama_stack_client import LlamaStackClient, Agent\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"from rich.pretty import pprint\n",
"import json\n",
"import uuid\n",

View file

@ -194,7 +194,7 @@
"metadata": {},
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
"client.initialize()"

View file

@ -56,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
"client.initialize()"

View file

@ -56,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
"client.initialize()"

View file

@ -56,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
"client.initialize()"

View file

@ -1 +1 @@
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.

View file

@ -17,7 +17,7 @@ import fire
import ruamel.yaml as yaml
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
from llama_stack.distribution.stack import LlamaStack # noqa: E402
from llama_stack.core.stack import LlamaStack # noqa: E402
from .pyopenapi.options import Options # noqa: E402
from .pyopenapi.specification import Info, Server # noqa: E402

View file

@ -12,7 +12,7 @@ from typing import TextIO
from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
from llama_stack.distribution.resolver import api_protocol_map
from llama_stack.core.resolver import api_protocol_map
from .generator import Generator
from .options import Options

View file

@ -73,7 +73,7 @@ The API is defined in the [YAML](_static/llama-stack-spec.yaml) and [HTML](_stat
To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distribution/server/server.py) repository.
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack.core/server/server.py) repository.
## Limitations

View file

@ -187,7 +187,7 @@
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{

View file

@ -355,7 +355,7 @@ server:
8. Run the server:
```bash
python -m llama_stack.distribution.server.server --yaml-config ~/.llama/run-byoa.yaml
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
```
9. Test the API:

View file

@ -103,5 +103,5 @@ llama stack run together
2. Start Streamlit UI
```bash
uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
```

View file

@ -174,7 +174,7 @@ spec:
- name: llama-stack
image: localhost/llama-stack-run-k8s:latest
imagePullPolicy: IfNotPresent
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"]
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
ports:
- containerPort: 5000
volumeMounts:

View file

@ -59,7 +59,7 @@ Build a Llama stack container
options:
-h, --help show this help message and exit
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will
be prompted to enter information interactively (default: None)
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
--list-templates Show the available templates for building a Llama Stack distribution (default: False)

View file

@ -10,7 +10,7 @@ llama stack build --template starter --image-type venv
```
```python
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from llama_stack.core.library_client import LlamaStackAsLibraryClient
client = LlamaStackAsLibraryClient(
"starter",

View file

@ -52,7 +52,7 @@ spec:
value: "${SAFETY_MODEL}"
- name: TAVILY_SEARCH_API_KEY
value: "${TAVILY_SEARCH_API_KEY}"
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
ports:
- containerPort: 8321
volumeMounts:

View file

@ -1,9 +1,4 @@
# External Providers Guide
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
- Create and maintain your own providers independently
- Share providers with others without contributing to the main codebase
- Keep provider-specific code separate from the core Llama Stack code
# Creating External Providers
## Configuration
@ -55,17 +50,6 @@ Llama Stack supports two types of external providers:
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
2. **Inline Providers**: Providers that run locally within the Llama Stack process
## Known External Providers
Here's a list of known external providers that you can use with Llama Stack:
| Name | Description | API | Type | Repository |
|------|-------------|-----|------|------------|
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
### Remote Provider Specification
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
@ -119,9 +103,9 @@ container_image: custom-vector-store:latest # optional
- `provider_data_validator`: Optional validator for provider data
- `container_image`: Optional container image to use instead of pip packages
## Required Implementation
## Required Fields
## All Providers
### All Providers
All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
@ -146,7 +130,7 @@ def get_provider_spec() -> ProviderSpec:
)
```
### Remote Providers
#### Remote Providers
Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
1. `config`: An instance of the provider's config class
@ -162,7 +146,7 @@ async def get_adapter_impl(
return OllamaInferenceAdapter(config)
```
### Inline Providers
#### Inline Providers
Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
1. `config`: An instance of the provider's config class
@ -189,7 +173,40 @@ Version: 0.1.0
Location: /path/to/venv/lib/python3.10/site-packages
```
## Example using `external_providers_dir`: Custom Ollama Provider
## Best Practices
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
4. **Documentation**: Include clear documentation in your provider package about:
- Installation requirements
- Configuration options
- Usage examples
- Any limitations or known issues
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
You can refer to the [integration tests
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
information. Execute the test for the Provider type you are developing.
## Troubleshooting
If your external provider isn't being loaded:
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
1. Check that the `external_providers_dir` path is correct and accessible.
2. Verify that the YAML files are properly formatted.
3. Ensure all required Python packages are installed.
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
information using `LLAMA_STACK_LOGGING=all=debug`.
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
## Examples
### Example using `external_providers_dir`: Custom Ollama Provider
Here's a complete example of creating and using a custom Ollama provider:
@ -241,7 +258,7 @@ external_providers_dir: ~/.llama/providers.d/
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
## Example using `module`: ramalama-stack
### Example using `module`: ramalama-stack
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
@ -267,34 +284,3 @@ additional_pip_packages:
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
The provider will now be available in Llama Stack with the type `remote::ramalama`.
## Best Practices
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
4. **Documentation**: Include clear documentation in your provider package about:
- Installation requirements
- Configuration options
- Usage examples
- Any limitations or known issues
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
You can refer to the [integration tests
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
information. Execute the test for the Provider type you are developing.
## Troubleshooting
If your external provider isn't being loaded:
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
1. Check that the `external_providers_dir` path is correct and accessible.
2. Verify that the YAML files are properly formatted.
3. Ensure all required Python packages are installed.
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
information using `LLAMA_STACK_LOGGING=all=debug`.
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.

View file

@ -0,0 +1,10 @@
# Known External Providers
Here's a list of known external providers that you can use with Llama Stack:
| Name | Description | API | Type | Repository |
|------|-------------|-----|------|------------|
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |

13
docs/source/providers/external/index.md vendored Normal file
View file

@ -0,0 +1,13 @@
# External Providers
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
- Create and maintain your own providers independently
- Share providers with others without contributing to the main codebase
- Keep provider-specific code separate from the core Llama Stack code
```{toctree}
:maxdepth: 1
external-providers-list
external-providers-guide
```

View file

@ -15,7 +15,7 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
```{toctree}
:maxdepth: 1
external
external/index
openai
inference/index
agents/index

View file

@ -24,6 +24,10 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
| `dpo_beta` | `<class 'float'>` | No | 0.1 | |
| `use_reference_model` | `<class 'bool'>` | No | True | |
| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | |
| `dpo_output_dir` | `<class 'str'>` | No | ./checkpoints/dpo | |
## Sample Configuration

View file

@ -66,7 +66,7 @@
"from pydantic import BaseModel\n",
"from termcolor import cprint\n",
"\n",
"from llama_stack.distribution.datatypes import RemoteProviderConfig\n",
"from llama_stack.core.datatypes import RemoteProviderConfig\n",
"from llama_stack.apis.safety import Safety\n",
"from llama_stack_client import LlamaStackClient\n",
"\n",

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.distribution.library_client import ( # noqa: F401
from llama_stack.core.library_client import ( # noqa: F401
AsyncLlamaStackAsLibraryClient,
LlamaStackAsLibraryClient,
)

View file

@ -193,18 +193,10 @@ class DPOLossType(Enum):
class DPOAlignmentConfig(BaseModel):
"""Configuration for Direct Preference Optimization (DPO) alignment.
:param reward_scale: Scaling factor for the reward signal
:param reward_clip: Maximum absolute value for reward clipping
:param epsilon: Small value added for numerical stability
:param gamma: Discount factor for future rewards
:param beta: Temperature parameter for the DPO loss
:param loss_type: The type of loss function to use for DPO
"""
reward_scale: float
reward_clip: float
epsilon: float
gamma: float
beta: float
loss_type: DPOLossType = DPOLossType.sigmoid

View file

@ -323,7 +323,7 @@ def _hf_download(
from huggingface_hub import snapshot_download
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
from llama_stack.distribution.utils.model_utils import model_local_dir
from llama_stack.core.utils.model_utils import model_local_dir
repo_id = model.huggingface_repo
if repo_id is None:
@ -361,7 +361,7 @@ def _meta_download(
info: "LlamaDownloadInfo",
max_concurrent_downloads: int,
):
from llama_stack.distribution.utils.model_utils import model_local_dir
from llama_stack.core.utils.model_utils import model_local_dir
output_dir = Path(model_local_dir(model.descriptor()))
os.makedirs(output_dir, exist_ok=True)
@ -403,7 +403,7 @@ class Manifest(BaseModel):
def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
from llama_stack.distribution.utils.model_utils import model_local_dir
from llama_stack.core.utils.model_utils import model_local_dir
with open(manifest_file) as f:
d = json.load(f)

View file

@ -11,7 +11,7 @@ from pathlib import Path
from llama_stack.cli.subcommand import Subcommand
from llama_stack.cli.table import print_table
from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
from llama_stack.models.llama.sku_list import all_registered_models

View file

@ -9,7 +9,7 @@ import os
import shutil
from llama_stack.cli.subcommand import Subcommand
from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
from llama_stack.models.llama.sku_list import resolve_model

View file

@ -23,27 +23,27 @@ from termcolor import colored, cprint
from llama_stack.cli.stack.utils import ImageType
from llama_stack.cli.table import print_table
from llama_stack.distribution.build import (
from llama_stack.core.build import (
SERVER_DEPENDENCIES,
build_image,
get_provider_dependencies,
)
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
from llama_stack.distribution.datatypes import (
from llama_stack.core.configure import parse_and_maybe_upgrade_config
from llama_stack.core.datatypes import (
BuildConfig,
BuildProvider,
DistributionSpec,
Provider,
StackRunConfig,
)
from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.external import load_external_apis
from llama_stack.distribution.resolver import InvalidProviderError
from llama_stack.distribution.stack import replace_env_vars
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.distribution.utils.exec import formulate_run_args, run_command
from llama_stack.distribution.utils.image_types import LlamaStackImageType
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.external import load_external_apis
from llama_stack.core.resolver import InvalidProviderError
from llama_stack.core.stack import replace_env_vars
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.exec import formulate_run_args, run_command
from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.datatypes import Api
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"

View file

@ -27,7 +27,7 @@ class StackBuild(Subcommand):
"--config",
type=str,
default=None,
help="Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
)
self.parser.add_argument(

View file

@ -26,7 +26,7 @@ class StackListApis(Subcommand):
def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
from llama_stack.cli.table import print_table
from llama_stack.distribution.distribution import stack_apis
from llama_stack.core.distribution import stack_apis
# eventually, this should query a registry at llama.meta.com/llamastack/distributions
headers = [

View file

@ -23,7 +23,7 @@ class StackListProviders(Subcommand):
@property
def providable_apis(self):
from llama_stack.distribution.distribution import providable_apis
from llama_stack.core.distribution import providable_apis
return [api.value for api in providable_apis()]
@ -38,7 +38,7 @@ class StackListProviders(Subcommand):
def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
from llama_stack.cli.table import print_table
from llama_stack.distribution.distribution import Api, get_provider_registry
from llama_stack.core.distribution import Api, get_provider_registry
all_providers = get_provider_registry()
if args.api:

View file

@ -85,8 +85,8 @@ class StackRun(Subcommand):
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
import yaml
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
from llama_stack.distribution.utils.exec import formulate_run_args, run_command
from llama_stack.core.configure import parse_and_maybe_upgrade_config
from llama_stack.core.utils.exec import formulate_run_args, run_command
if args.enable_ui:
self._start_ui_development_server(args.port)
@ -94,7 +94,7 @@ class StackRun(Subcommand):
if args.config:
try:
from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template
config_file = resolve_config_or_template(args.config, Mode.RUN)
except ValueError as e:
@ -127,7 +127,7 @@ class StackRun(Subcommand):
# using the current environment packages.
if not image_type and not image_name:
logger.info("No image type or image name provided. Assuming environment packages.")
from llama_stack.distribution.server.server import main as server_main
from llama_stack.core.server.server import main as server_main
# Build the server args from the current args passed to the CLI
server_args = argparse.Namespace()

View file

@ -107,7 +107,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
from llama_stack.distribution.utils.model_utils import model_local_dir
from llama_stack.core.utils.model_utils import model_local_dir
console = Console()
model_dir = Path(model_local_dir(args.model_id))

View file

@ -6,7 +6,7 @@
from typing import Any
from llama_stack.distribution.datatypes import User
from llama_stack.core.datatypes import User
from .conditions import (
Condition,

View file

@ -12,11 +12,11 @@ from pathlib import Path
from pydantic import BaseModel
from termcolor import cprint
from llama_stack.distribution.datatypes import BuildConfig
from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.external import load_external_apis
from llama_stack.distribution.utils.exec import run_command
from llama_stack.distribution.utils.image_types import LlamaStackImageType
from llama_stack.core.datatypes import BuildConfig
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.external import load_external_apis
from llama_stack.core.utils.exec import run_command
from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.datatypes import Api
from llama_stack.templates.template import DistributionTemplate
@ -122,7 +122,7 @@ def build_image(
normal_deps.extend(api_spec.pip_packages)
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
script = str(importlib.resources.files("llama_stack") / "distribution/build_container.sh")
script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
args = [
script,
"--template-or-config",
@ -139,7 +139,7 @@ def build_image(
if run_config is not None:
args.extend(["--run-config", run_config])
elif build_config.image_type == LlamaStackImageType.CONDA.value:
script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh")
script = str(importlib.resources.files("llama_stack") / "core/build_conda_env.sh")
args = [
script,
"--env-name",
@ -150,7 +150,7 @@ def build_image(
" ".join(normal_deps),
]
elif build_config.image_type == LlamaStackImageType.VENV.value:
script = str(importlib.resources.files("llama_stack") / "distribution/build_venv.sh")
script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
args = [
script,
"--env-name",

View file

@ -327,12 +327,12 @@ EOF
# If a run config is provided, we use the --config flag
if [[ -n "$run_config" ]]; then
add_to_container << EOF
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "$RUN_CONFIG_PATH"]
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--config", "$RUN_CONFIG_PATH"]
EOF
# If a template is provided (not a yaml file), we use the --template flag
elif [[ "$template_or_config" != *.yaml ]]; then
add_to_container << EOF
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$template_or_config"]
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--template", "$template_or_config"]
EOF
fi

View file

@ -7,20 +7,20 @@ import logging
import textwrap
from typing import Any
from llama_stack.distribution.datatypes import (
from llama_stack.core.datatypes import (
LLAMA_STACK_RUN_CONFIG_VERSION,
DistributionSpec,
Provider,
StackRunConfig,
)
from llama_stack.distribution.distribution import (
from llama_stack.core.distribution import (
builtin_automatically_routed_apis,
get_provider_registry,
)
from llama_stack.distribution.stack import cast_image_name_to_string, replace_env_vars
from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.prompt_for_config import prompt_for_config
from llama_stack.providers.datatypes import Api, ProviderSpec
logger = logging.getLogger(__name__)

View file

@ -24,7 +24,7 @@ from llama_stack.apis.shields import Shield, ShieldInput
from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.access_control.datatypes import AccessRule
from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig

View file

@ -12,8 +12,8 @@ from typing import Any
import yaml
from pydantic import BaseModel
from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec
from llama_stack.distribution.external import load_external_apis
from llama_stack.core.datatypes import BuildConfig, DistributionSpec
from llama_stack.core.external import load_external_apis
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
AdapterSpec,

View file

@ -8,7 +8,7 @@
import yaml
from llama_stack.apis.datatypes import Api, ExternalApiSpec
from llama_stack.distribution.datatypes import BuildConfig, StackRunConfig
from llama_stack.core.datatypes import BuildConfig, StackRunConfig
from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="core")

View file

@ -15,9 +15,9 @@ from llama_stack.apis.inspect import (
RouteInfo,
VersionInfo,
)
from llama_stack.distribution.datatypes import StackRunConfig
from llama_stack.distribution.external import load_external_apis
from llama_stack.distribution.server.routes import get_all_api_routes
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.core.external import load_external_apis
from llama_stack.core.server.routes import get_all_api_routes
from llama_stack.providers.datatypes import HealthStatus

View file

@ -31,23 +31,23 @@ from pydantic import BaseModel, TypeAdapter
from rich.console import Console
from termcolor import cprint
from llama_stack.distribution.build import print_pip_install_help
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
from llama_stack.distribution.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
from llama_stack.distribution.request_headers import (
from llama_stack.core.build import print_pip_install_help
from llama_stack.core.configure import parse_and_maybe_upgrade_config
from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
from llama_stack.core.request_headers import (
PROVIDER_DATA_VAR,
request_provider_data_context,
)
from llama_stack.distribution.resolver import ProviderRegistry
from llama_stack.distribution.server.routes import RouteImpls, find_matching_route, initialize_route_impls
from llama_stack.distribution.stack import (
from llama_stack.core.resolver import ProviderRegistry
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
from llama_stack.core.stack import (
construct_stack,
get_stack_run_config_from_template,
replace_env_vars,
)
from llama_stack.distribution.utils.config import redact_sensitive_fields
from llama_stack.distribution.utils.context import preserve_contexts_async_generator
from llama_stack.distribution.utils.exec import in_notebook
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.core.utils.exec import in_notebook
from llama_stack.providers.utils.telemetry.tracing import (
CURRENT_TRACE_CONTEXT,
end_trace,

View file

@ -10,7 +10,7 @@ import logging
from contextlib import AbstractContextManager
from typing import Any
from llama_stack.distribution.datatypes import User
from llama_stack.core.datatypes import User
from .utils.dynamic import instantiate_class_type

View file

@ -27,18 +27,18 @@ from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.client import get_client_impl
from llama_stack.distribution.datatypes import (
from llama_stack.core.client import get_client_impl
from llama_stack.core.datatypes import (
AccessRule,
AutoRoutedProviderSpec,
Provider,
RoutingTableProviderSpec,
StackRunConfig,
)
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
from llama_stack.distribution.external import load_external_apis
from llama_stack.distribution.store import DistributionRegistry
from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.core.distribution import builtin_automatically_routed_apis
from llama_stack.core.external import load_external_apis
from llama_stack.core.store import DistributionRegistry
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
Api,
@ -183,7 +183,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
spec=RoutingTableProviderSpec(
api=info.routing_table_api,
router_api=info.router_api,
module="llama_stack.distribution.routers",
module="llama_stack.core.routers",
api_dependencies=[],
deps__=[f"inner-{info.router_api.value}"],
),
@ -197,7 +197,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
config={},
spec=AutoRoutedProviderSpec(
api=info.router_api,
module="llama_stack.distribution.routers",
module="llama_stack.core.routers",
routing_table_api=info.routing_table_api,
api_dependencies=[info.routing_table_api],
# Add telemetry as an optional dependency to all auto-routed providers

View file

@ -6,9 +6,9 @@
from typing import Any
from llama_stack.distribution.datatypes import AccessRule, RoutedProtocol
from llama_stack.distribution.stack import StackRunConfig
from llama_stack.distribution.store import DistributionRegistry
from llama_stack.core.datatypes import AccessRule, RoutedProtocol
from llama_stack.core.stack import StackRunConfig
from llama_stack.core.store import DistributionRegistry
from llama_stack.providers.datatypes import Api, RoutingTable
from llama_stack.providers.utils.inference.inference_store import InferenceStore

View file

@ -7,7 +7,7 @@
from typing import Any
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
from llama_stack.distribution.datatypes import (
from llama_stack.core.datatypes import (
BenchmarkWithOwner,
)
from llama_stack.log import get_logger

View file

@ -10,16 +10,16 @@ from llama_stack.apis.common.errors import ModelNotFoundError
from llama_stack.apis.models import Model
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.scoring_functions import ScoringFn
from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
from llama_stack.distribution.access_control.datatypes import Action
from llama_stack.distribution.datatypes import (
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
from llama_stack.core.access_control.datatypes import Action
from llama_stack.core.datatypes import (
AccessRule,
RoutableObject,
RoutableObjectWithProvider,
RoutedProtocol,
)
from llama_stack.distribution.request_headers import get_authenticated_user
from llama_stack.distribution.store import DistributionRegistry
from llama_stack.core.request_headers import get_authenticated_user
from llama_stack.core.store import DistributionRegistry
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, RoutingTable

View file

@ -19,7 +19,7 @@ from llama_stack.apis.datasets import (
URIDataSource,
)
from llama_stack.apis.resource import ResourceType
from llama_stack.distribution.datatypes import (
from llama_stack.core.datatypes import (
DatasetWithOwner,
)
from llama_stack.log import get_logger

View file

@ -9,7 +9,7 @@ from typing import Any
from llama_stack.apis.common.errors import ModelNotFoundError
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
from llama_stack.distribution.datatypes import (
from llama_stack.core.datatypes import (
ModelWithOwner,
RegistryEntrySource,
)

View file

@ -12,7 +12,7 @@ from llama_stack.apis.scoring_functions import (
ScoringFnParams,
ScoringFunctions,
)
from llama_stack.distribution.datatypes import (
from llama_stack.core.datatypes import (
ScoringFnWithOwner,
)
from llama_stack.log import get_logger

View file

@ -8,7 +8,7 @@ from typing import Any
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
from llama_stack.distribution.datatypes import (
from llama_stack.core.datatypes import (
ShieldWithOwner,
)
from llama_stack.log import get_logger

View file

@ -8,7 +8,7 @@ from typing import Any
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
from llama_stack.distribution.datatypes import ToolGroupWithOwner
from llama_stack.core.datatypes import ToolGroupWithOwner
from llama_stack.log import get_logger
from .common import CommonRoutingTableImpl

View file

@ -23,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.distribution.datatypes import (
from llama_stack.core.datatypes import (
VectorDBWithOwner,
)
from llama_stack.log import get_logger
@ -84,8 +84,6 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
async def unregister_vector_db(self, vector_db_id: str) -> None:
existing_vector_db = await self.get_vector_db(vector_db_id)
if existing_vector_db is None:
raise VectorStoreNotFoundError(vector_db_id)
await self.unregister_object(existing_vector_db)
async def openai_retrieve_vector_store(

View file

@ -9,10 +9,10 @@ import json
import httpx
from aiohttp import hdrs
from llama_stack.distribution.datatypes import AuthenticationConfig, User
from llama_stack.distribution.request_headers import user_from_scope
from llama_stack.distribution.server.auth_providers import create_auth_provider
from llama_stack.distribution.server.routes import find_matching_route, initialize_route_impls
from llama_stack.core.datatypes import AuthenticationConfig, User
from llama_stack.core.request_headers import user_from_scope
from llama_stack.core.server.auth_providers import create_auth_provider
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="auth")

View file

@ -14,7 +14,7 @@ import httpx
from jose import jwt
from pydantic import BaseModel, Field
from llama_stack.distribution.datatypes import (
from llama_stack.core.datatypes import (
AuthenticationConfig,
CustomAuthConfig,
GitHubTokenAuthConfig,

View file

@ -15,7 +15,7 @@ from starlette.routing import Route
from llama_stack.apis.datatypes import Api, ExternalApiSpec
from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
from llama_stack.distribution.resolver import api_protocol_map
from llama_stack.core.resolver import api_protocol_map
from llama_stack.schema_utils import WebMethod
EndpointFunc = Callable[..., Any]

View file

@ -33,35 +33,35 @@ from pydantic import BaseModel, ValidationError
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.cli.utils import add_config_template_args, get_config_from_args
from llama_stack.distribution.access_control.access_control import AccessDeniedError
from llama_stack.distribution.datatypes import (
from llama_stack.core.access_control.access_control import AccessDeniedError
from llama_stack.core.datatypes import (
AuthenticationRequiredError,
LoggingConfig,
StackRunConfig,
)
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
from llama_stack.distribution.external import ExternalApiSpec, load_external_apis
from llama_stack.distribution.request_headers import (
from llama_stack.core.distribution import builtin_automatically_routed_apis
from llama_stack.core.external import ExternalApiSpec, load_external_apis
from llama_stack.core.request_headers import (
PROVIDER_DATA_VAR,
request_provider_data_context,
user_from_scope,
)
from llama_stack.distribution.resolver import InvalidProviderError
from llama_stack.distribution.server.routes import (
from llama_stack.core.resolver import InvalidProviderError
from llama_stack.core.server.routes import (
find_matching_route,
get_all_api_routes,
initialize_route_impls,
)
from llama_stack.distribution.stack import (
from llama_stack.core.stack import (
cast_image_name_to_string,
construct_stack,
replace_env_vars,
shutdown_stack,
validate_env_pair,
)
from llama_stack.distribution.utils.config import redact_sensitive_fields
from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template
from llama_stack.distribution.utils.context import preserve_contexts_async_generator
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template
from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api
from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig

View file

@ -34,14 +34,14 @@ from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.datatypes import Provider, StackRunConfig
from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.inspect import DistributionInspectConfig, DistributionInspectImpl
from llama_stack.distribution.providers import ProviderImpl, ProviderImplConfig
from llama_stack.distribution.resolver import ProviderRegistry, resolve_impls
from llama_stack.distribution.routing_tables.common import CommonRoutingTableImpl
from llama_stack.distribution.store.registry import create_dist_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.core.datatypes import Provider, StackRunConfig
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
from llama_stack.core.store.registry import create_dist_registry
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api

View file

@ -122,7 +122,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
yaml_config_arg=""
fi
$PYTHON_BINARY -m llama_stack.distribution.server.server \
$PYTHON_BINARY -m llama_stack.core.server.server \
$yaml_config_arg \
--port "$port" \
$env_vars \

View file

@ -10,8 +10,8 @@ from typing import Protocol
import pydantic
from llama_stack.distribution.datatypes import RoutableObjectWithProvider
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.core.datatypes import RoutableObjectWithProvider
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig

View file

@ -36,7 +36,7 @@ llama-stack-client benchmarks register \
3. Start Streamlit UI
```bash
uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
```
## Environment Variables

Some files were not shown because too many files have changed in this diff Show more