Merge branch 'main' into openai-vector-store/qdrant

2025-08-15 14:08:00 +00:00 · 2025-07-31 15:49:49 -07:00 · 2025-07-31 15:49:49 -07:00 · 970d0f307f
commit 970d0f307f
parent b0e435808a 369286f95b
338 changed files with 15301 additions and 15997 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -0,0 +1,198 @@
+name: 'Run and Record Tests'
+description: 'Run integration tests and handle recording/artifact upload'
+
+inputs:
+  test-types:
+    description: 'JSON array of test types to run'
+    required: true
+  stack-config:
+    description: 'Stack configuration to use'
+    required: true
+  provider:
+    description: 'Provider to use for tests'
+    required: true
+  inference-mode:
+    description: 'Inference mode (record or replay)'
+    required: true
+  run-vision-tests:
+    description: 'Whether to run vision tests'
+    required: false
+    default: 'false'
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Check Storage and Memory Available Before Tests
+      if: ${{ always() }}
+      shell: bash
+      run: |
+        free -h
+        df -h
+
+    - name: Set environment variables
+      shell: bash
+      run: |
+        echo "LLAMA_STACK_CLIENT_TIMEOUT=300" >> $GITHUB_ENV
+        echo "LLAMA_STACK_TEST_INFERENCE_MODE=${{ inputs.inference-mode }}" >> $GITHUB_ENV
+
+        # Configure provider-specific settings
+        if [ "${{ inputs.provider }}" == "ollama" ]; then
+          echo "OLLAMA_URL=http://0.0.0.0:11434" >> $GITHUB_ENV
+          echo "TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16" >> $GITHUB_ENV
+          echo "SAFETY_MODEL=ollama/llama-guard3:1b" >> $GITHUB_ENV
+        else
+          echo "VLLM_URL=http://localhost:8000/v1" >> $GITHUB_ENV
+          echo "TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct" >> $GITHUB_ENV
+        fi
+
+        if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
+          echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings/vision" >> $GITHUB_ENV
+        else
+          echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings" >> $GITHUB_ENV
+        fi
+
+    - name: Run Llama Stack Server
+      if: ${{ contains(inputs.stack-config, 'server:') }}
+      shell: bash
+      run: |
+        # Run this so pytest in a loop doesn't start-stop servers in a loop
+        echo "Starting Llama Stack Server"
+        nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 &
+
+        echo "Waiting for Llama Stack Server to start"
+        for i in {1..30}; do
+          if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
+            echo "Llama Stack Server started"
+            exit 0
+          fi
+          sleep 1
+        done
+
+        echo "Llama Stack Server failed to start"
+        cat server.log
+        exit 1
+
+    - name: Run Integration Tests
+      shell: bash
+      run: |
+        stack_config="${{ inputs.stack-config }}"
+        EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
+
+        # Configure provider-specific settings
+        if [ "${{ inputs.provider }}" == "ollama" ]; then
+          EXTRA_PARAMS="--safety-shield=llama-guard"
+        else
+          EXTRA_PARAMS=""
+          EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
+        fi
+
+        if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
+          if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \
+            -k "not( ${EXCLUDE_TESTS} )" \
+            --vision-model=ollama/llama3.2-vision:11b \
+            --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
+            --color=yes ${EXTRA_PARAMS} \
+            --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-vision.log; then
+            echo "✅ Tests completed for vision"
+          else
+            echo "❌ Tests failed for vision"
+            exit 1
+          fi
+
+          exit 0
+        fi
+
+        # Run non-vision tests
+        TEST_TYPES='${{ inputs.test-types }}'
+        echo "Test types to run: $TEST_TYPES"
+
+        # Collect all test files for the specified test types
+        TEST_FILES=""
+        for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
+          # if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
+          if [ "${{ inputs.provider }}" == "vllm" ]; then
+            if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
+              echo "Skipping $test_type for vllm provider"
+              continue
+            fi
+          fi
+
+          if [ -d "tests/integration/$test_type" ]; then
+            # Find all Python test files in this directory
+            test_files=$(find tests/integration/$test_type -name "test_*.py" -o -name "*_test.py")
+            if [ -n "$test_files" ]; then
+              TEST_FILES="$TEST_FILES $test_files"
+              echo "Added test files from $test_type: $(echo $test_files | wc -w) files"
+            fi
+          else
+            echo "Warning: Directory tests/integration/$test_type does not exist"
+          fi
+        done
+
+        if [ -z "$TEST_FILES" ]; then
+          echo "No test files found for the specified test types"
+          exit 1
+        fi
+
+        echo "=== Running all collected tests in a single pytest command ==="
+        echo "Total test files: $(echo $TEST_FILES | wc -w)"
+
+        if uv run pytest -s -v $TEST_FILES --stack-config=${stack_config} \
+          -k "not( ${EXCLUDE_TESTS} )" \
+          --text-model=$TEXT_MODEL \
+          --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
+          --color=yes ${EXTRA_PARAMS} \
+          --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-all.log; then
+          echo "✅ All tests completed successfully"
+        else
+          echo "❌ Tests failed"
+          exit 1
+        fi
+
+    - name: Check Storage and Memory Available After Tests
+      if: ${{ always() }}
+      shell: bash
+      run: |
+        free -h
+        df -h
+
+    - name: Commit and push recordings
+      if: ${{ inputs.inference-mode == 'record' }}
+      shell: bash
+      run: |
+        echo "Checking for recording changes"
+        git status --porcelain tests/integration/recordings/
+
+        if [[ -n $(git status --porcelain tests/integration/recordings/) ]]; then
+          echo "New recordings detected, committing and pushing"
+          git add tests/integration/recordings/
+
+          if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
+            git commit -m "Recordings update from CI (vision)"
+          else
+            git commit -m "Recordings update from CI"
+          fi
+
+          git fetch origin ${{ github.event.pull_request.head.ref }}
+          git rebase origin/${{ github.event.pull_request.head.ref }}
+          echo "Rebased successfully"
+          git push origin HEAD:${{ github.event.pull_request.head.ref }}
+          echo "Pushed successfully"
+        else
+          echo "No recording changes"
+        fi
+
+    - name: Write inference logs to file
+      if: ${{ always() }}
+      shell: bash
+      run: |
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+
+    - name: Upload logs
+      if: ${{ always() }}
+      uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+      with:
+        name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
+        path: |
+          *.log
+        retention-days: 1
--- a/.github/actions/run-integration-tests/action.yml
+++ b/.github/actions/run-integration-tests/action.yml
@ -1,73 +0,0 @@
-name: 'Run Integration Tests'
-description: 'Run integration tests with configurable execution mode and provider settings'
-
-inputs:
-  test-types:
-    description: 'Test types to run (JSON array)'
-    required: true
-  stack-config:
-    description: 'Stack configuration: "ci-tests" or "server:ci-tests"'
-    required: true
-  provider:
-    description: 'Provider to use: "ollama" or "vllm"'
-    required: true
-  inference-mode:
-    description: 'Inference mode: "record" or "replay"'
-    required: true
-
-outputs:
-  logs-path:
-    description: 'Path to generated log files'
-    value: '*.log'
-
-runs:
-  using: 'composite'
-  steps:
-    - name: Run Integration Tests
-      env:
-        LLAMA_STACK_CLIENT_TIMEOUT: "300"
-        LLAMA_STACK_TEST_RECORDING_DIR: "tests/integration/recordings"
-        LLAMA_STACK_TEST_INFERENCE_MODE: ${{ inputs.inference-mode }}
-      shell: bash
-      run: |
-        stack_config="${{ inputs.stack-config }}"
-        EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
-
-        # Configure provider-specific settings
-        if [ "${{ inputs.provider }}" == "ollama" ]; then
-          export OLLAMA_URL="http://0.0.0.0:11434"
-          export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
-          export SAFETY_MODEL="ollama/llama-guard3:1b"
-          EXTRA_PARAMS="--safety-shield=llama-guard"
-        else
-          export VLLM_URL="http://localhost:8000/v1"
-          export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
-          EXTRA_PARAMS=""
-          EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
-        fi
-
-        TEST_TYPES='${{ inputs.test-types }}'
-        echo "Test types to run: $TEST_TYPES"
-
-        for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
-          # if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
-          if [ "${{ inputs.provider }}" == "vllm" ]; then
-            if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
-              continue
-            fi
-          fi
-
-          echo "=== Running tests for: $test_type ==="
-
-          if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \
-            -k "not( ${EXCLUDE_TESTS} )" \
-            --text-model=$TEXT_MODEL \
-            --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
-            --color=yes ${EXTRA_PARAMS} \
-            --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then
-            echo "✅ Tests completed for $test_type"
-          else
-            echo "❌ Tests failed for $test_type"
-            exit 1
-          fi
-        done
--- a/.github/actions/setup-ollama/action.yml
+++ b/.github/actions/setup-ollama/action.yml
@ -1,11 +1,23 @@
 name: Setup Ollama
 description: Start Ollama
+inputs:
+  run-vision-tests:
+    description: 'Run vision tests: "true" or "false"'
+    required: false
+    default: 'false'
 runs:
  using: "composite"
  steps:
    - name: Start Ollama
      shell: bash
      run: |
-        docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
+        if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
+          image="ollama-with-vision-model"
+        else
+          image="ollama-with-models"
+        fi
+
+        echo "Starting Ollama with image: $image"
+        docker run -d --name ollama -p 11434:11434 docker.io/llamastack/$image
        echo "Verifying Ollama status..."
        timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
--- a/.github/actions/setup-test-environment/action.yml
+++ b/.github/actions/setup-test-environment/action.yml
@ -0,0 +1,51 @@
+name: 'Setup Test Environment'
+description: 'Common setup steps for integration tests including dependencies, providers, and build'
+
+inputs:
+  python-version:
+    description: 'Python version to use'
+    required: true
+  client-version:
+    description: 'Client version (latest or published)'
+    required: true
+  provider:
+    description: 'Provider to setup (ollama or vllm)'
+    required: true
+    default: 'ollama'
+  run-vision-tests:
+    description: 'Whether to setup provider for vision tests'
+    required: false
+    default: 'false'
+  inference-mode:
+    description: 'Inference mode (record or replay)'
+    required: true
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Install dependencies
+      uses: ./.github/actions/setup-runner
+      with:
+        python-version: ${{ inputs.python-version }}
+        client-version: ${{ inputs.client-version }}
+
+    - name: Setup ollama
+      if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
+      uses: ./.github/actions/setup-ollama
+      with:
+        run-vision-tests: ${{ inputs.run-vision-tests }}
+
+    - name: Setup vllm
+      if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
+      uses: ./.github/actions/setup-vllm
+
+    - name: Build Llama Stack
+      shell: bash
+      run: |
+        uv run llama stack build --template ci-tests --image-type venv
+
+    - name: Configure git for commits
+      shell: bash
+      run: |
+        git config --local user.email "github-actions[bot]@users.noreply.github.com"
+        git config --local user.name "github-actions[bot]"
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@ -1,6 +1,6 @@
 # Llama Stack CI

-Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
+Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a table detailing what CI the project includes and the purpose.

 | Name | File | Purpose |
 | ---- | ---- | ------- |
@ -8,7 +8,7 @@ Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table
 | Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
 | Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
 | SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
-| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration |
+| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
 | Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
 | Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
 | Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -1,22 +1,22 @@
-name: Integration Tests
+name: Integration Tests (Replay)

-run-name: Run the integration test suite from tests/integration
+run-name: Run the integration test suite from tests/integration in replay mode

 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
-    types: [opened, synchronize, labeled]
+    types: [opened, synchronize, reopened]
    paths:
      - 'llama_stack/**'
      - 'tests/**'
      - 'uv.lock'
      - 'pyproject.toml'
-      - 'requirements.txt'
      - '.github/workflows/integration-tests.yml' # This workflow
      - '.github/actions/setup-ollama/action.yml'
-      - '.github/actions/run-integration-tests/action.yml'
+      - '.github/actions/setup-test-environment/action.yml'
+      - '.github/actions/run-and-record-tests/action.yml'
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
@ -33,31 +33,15 @@ on:
        default: 'ollama'

 concurrency:
-  # This creates three concurrency groups:
-  #   ${{ github.workflow }}-${{ github.ref }}-rerecord (for valid triggers with re-record-tests label)
-  #   ${{ github.workflow }}-${{ github.ref }}-replay (for valid triggers without re-record-tests label)
-  #   ${{ github.workflow }}-${{ github.ref }}-no-run (for invalid triggers that will be skipped)
-  # The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
-  group: >-
-    ${{ github.workflow }}-${{ github.ref }}-${{
-    (github.event.action == 'opened' ||
-     github.event.action == 'synchronize' ||
-     (github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))) &&
-    (contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'replay') ||
-    'no-run'
-    }}
+  # Skip concurrency for pushes to main - each commit should be tested independently
+  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
  cancel-in-progress: true

 jobs:
  discover-tests:
-    if: |
-      github.event.action == 'opened' ||
-      github.event.action == 'synchronize' ||
-      (github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))
    runs-on: ubuntu-latest
    outputs:
      test-types: ${{ steps.generate-test-types.outputs.test-types }}
-      rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}

    steps:
      - name: Checkout repository
@ -67,94 +51,13 @@ jobs:
        id: generate-test-types
        run: |
          # Get test directories dynamically, excluding non-test directories
+          # NOTE: we are excluding post_training since the tests take too long
          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
-            grep -Ev "^(__pycache__|fixtures|test_cases|recordings)$" |
+            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
            sort | jq -R -s -c 'split("\n")[:-1]')
          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT

-      - name: Check if re-record-tests label exists
-        id: check-rerecord-tests
-        run: |
-          if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
-            echo "rerecord-tests=true" >> $GITHUB_OUTPUT
-          else
-            echo "rerecord-tests=false" >> $GITHUB_OUTPUT
-          fi
-
-  record-tests:
-    # Sequential job for recording to avoid SQLite conflicts
-    if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
-    needs: discover-tests
-    runs-on: ubuntu-latest
-
-    permissions:
-      contents: write
-      pull-requests: write
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-        with:
-          python-version: "3.12"  # Use single Python version for recording
-          client-version: "latest"
-
-      - name: Setup ollama
-        if: ${{ inputs.test-provider == 'ollama' }}
-        uses: ./.github/actions/setup-ollama
-
-      - name: Setup vllm
-        if: ${{ inputs.test-provider == 'vllm' }}
-        uses: ./.github/actions/setup-vllm
-
-      - name: Build Llama Stack
-        run: |
-          uv run llama stack build --template ci-tests --image-type venv
-
-      - name: Configure git for commits
-        run: |
-          git config --local user.email "github-actions[bot]@users.noreply.github.com"
-          git config --local user.name "github-actions[bot]"
-
-      - name: Run Integration Tests for All Types (Recording Mode)
-        uses: ./.github/actions/run-integration-tests
-        with:
-          test-types: ${{ needs.discover-tests.outputs.test-types }}
-          stack-config: 'server:ci-tests'  # recording must be done with server since more tests are run
-          provider: ${{ inputs.test-provider }}
-          inference-mode: 'record'
-
-      - name: Commit and push recordings
-        run: |
-          if ! git diff --quiet tests/integration/recordings/; then
-            echo "Committing recordings"
-            git add tests/integration/recordings/
-            git commit -m "Update recordings"
-            echo "Pushing all recording commits to PR"
-            git push origin HEAD:${{ github.head_ref }}
-          else
-            echo "No recording changes"
-          fi
-
-      - name: Write inference logs to file
-        if: ${{ always() }}
-        run: |
-          sudo docker logs ollama > ollama-recording.log || true
-
-      - name: Upload recording logs
-        if: ${{ always() }}
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
-        with:
-          name: recording-logs-${{ github.run_id }}
-          path: |
-            *.log
-          retention-days: 1
-
-  run-tests:
-    # Skip this job if we're in recording mode (handled by record-tests job)
-    if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
+  run-replay-mode-tests:
    needs: discover-tests
    runs-on: ubuntu-latest

@ -164,48 +67,29 @@ jobs:
        client-type: [library, server]
        # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
        provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
-        python-version: ["3.12", "3.13"]
-        client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
+        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
+        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        run-vision-tests: ['true', 'false']

    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
+      - name: Setup test environment
+        uses: ./.github/actions/setup-test-environment
        with:
          python-version: ${{ matrix.python-version }}
          client-version: ${{ matrix.client-version }}
+          provider: ${{ matrix.provider }}
+          run-vision-tests: ${{ matrix.run-vision-tests }}
+          inference-mode: 'replay'

-      - name: Build Llama Stack
-        run: |
-          uv run llama stack build --template ci-tests --image-type venv
-
-      - name: Check Storage and Memory Available Before Tests
-        if: ${{ always() }}
-        run: |
-          free -h
-          df -h
-
-      - name: Run Integration Tests (Replay Mode)
-        uses: ./.github/actions/run-integration-tests
+      - name: Run tests
+        uses: ./.github/actions/run-and-record-tests
        with:
          test-types: ${{ needs.discover-tests.outputs.test-types }}
          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
          provider: ${{ matrix.provider }}
          inference-mode: 'replay'
-
-      - name: Check Storage and Memory Available After Tests
-        if: ${{ always() }}
-        run: |
-          free -h
-          df -h
-
-      - name: Upload test logs on failure
-        if: ${{ failure() }}
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
-        with:
-          name: test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
-          path: |
-            *.log
-          retention-days: 1
+          run-vision-tests: ${{ matrix.run-vision-tests }}
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -9,8 +9,8 @@ on:
    paths:
      - 'llama_stack/cli/stack/build.py'
      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/distribution/build.*'
-      - 'llama_stack/distribution/*.sh'
+      - 'llama_stack/core/build.*'
+      - 'llama_stack/core/*.sh'
      - '.github/workflows/providers-build.yml'
      - 'llama_stack/templates/**'
      - 'pyproject.toml'
@ -19,8 +19,8 @@ on:
    paths:
      - 'llama_stack/cli/stack/build.py'
      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/distribution/build.*'
-      - 'llama_stack/distribution/*.sh'
+      - 'llama_stack/core/build.*'
+      - 'llama_stack/core/*.sh'
      - '.github/workflows/providers-build.yml'
      - 'llama_stack/templates/**'
      - 'pyproject.toml'
@ -108,7 +108,7 @@ jobs:
          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
          echo "Entrypoint: $entrypoint"
-          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
+          if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then
            echo "Entrypoint is not correct"
            exit 1
          fi
@ -142,7 +142,7 @@ jobs:
          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
          echo "Entrypoint: $entrypoint"
-          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
+          if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then
            echo "Entrypoint is not correct"
            exit 1
          fi
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@ -12,12 +12,13 @@ on:
      - 'tests/integration/**'
      - 'uv.lock'
      - 'pyproject.toml'
-      - 'requirements.txt'
      - 'tests/external/*'
      - '.github/workflows/test-external-provider-module.yml' # This workflow

 jobs:
  test-external-providers-from-module:
+    # This workflow is disabled. See https://github.com/meta-llama/llama-stack/pull/2975#issuecomment-3138702984 for details
+    if: false
    runs-on: ubuntu-latest
    strategy:
      matrix:
@ -47,7 +48,7 @@ jobs:

      - name: Build distro from config file
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/ramalama-stack/build.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml

      - name: Start Llama Stack server in background
        if: ${{ matrix.image-type }} == 'venv'
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@ -43,11 +43,11 @@ jobs:

      - name: Print distro dependencies
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml --print-deps-only
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml --print-deps-only

      - name: Build distro from config file
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml

      - name: Start Llama Stack server in background
        if: ${{ matrix.image-type }} == 'venv'
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,7 +1,7 @@
 include pyproject.toml
 include llama_stack/models/llama/llama3/tokenizer.model
 include llama_stack/models/llama/llama4/tokenizer.model
-include llama_stack/distribution/*.sh
+include llama_stack.core/*.sh
 include llama_stack/cli/scripts/*.sh
 include llama_stack/templates/*/*.yaml
 include llama_stack/providers/tests/test_cases/inference/*.json
--- a/README.md
+++ b/README.md
@ -6,7 +6,6 @@
 [![Discord](https://img.shields.io/discord/1257833999603335178?color=6A7EC2&logo=discord&logoColor=ffffff)](https://discord.gg/llama-stack)
 [![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
 [![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
-![coverage badge](./coverage.svg)

 [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)

--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -15078,22 +15078,6 @@
            "DPOAlignmentConfig": {
                "type": "object",
                "properties": {
-                    "reward_scale": {
-                        "type": "number",
-                        "description": "Scaling factor for the reward signal"
-                    },
-                    "reward_clip": {
-                        "type": "number",
-                        "description": "Maximum absolute value for reward clipping"
-                    },
-                    "epsilon": {
-                        "type": "number",
-                        "description": "Small value added for numerical stability"
-                    },
-                    "gamma": {
-                        "type": "number",
-                        "description": "Discount factor for future rewards"
-                    },
                    "beta": {
                        "type": "number",
                        "description": "Temperature parameter for the DPO loss"
@ -15106,10 +15090,6 @@
                },
                "additionalProperties": false,
                "required": [
-                    "reward_scale",
-                    "reward_clip",
-                    "epsilon",
-                    "gamma",
                    "beta",
                    "loss_type"
                ],
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -11163,20 +11163,6 @@ components:
    DPOAlignmentConfig:
      type: object
      properties:
-        reward_scale:
-          type: number
-          description: Scaling factor for the reward signal
-        reward_clip:
-          type: number
-          description: >-
-            Maximum absolute value for reward clipping
-        epsilon:
-          type: number
-          description: >-
-            Small value added for numerical stability
-        gamma:
-          type: number
-          description: Discount factor for future rewards
        beta:
          type: number
          description: Temperature parameter for the DPO loss
@ -11186,10 +11172,6 @@ components:
          description: The type of loss function to use for DPO
      additionalProperties: false
      required:
-        - reward_scale
-        - reward_clip
-        - epsilon
-        - gamma
        - beta
        - loss_type
      title: DPOAlignmentConfig
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -165,7 +165,7 @@
        "# use this helper if needed to kill the server \n",
        "def kill_llama_stack_server():\n",
        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
      ]
    },
    {
--- a/docs/getting_started_llama4.ipynb
+++ b/docs/getting_started_llama4.ipynb
@ -275,7 +275,7 @@
        "# use this helper if needed to kill the server \n",
        "def kill_llama_stack_server():\n",
        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
      ]
    },
    {
--- a/docs/getting_started_llama_api.ipynb
+++ b/docs/getting_started_llama_api.ipynb
@ -265,7 +265,7 @@
          "# use this helper if needed to kill the server \n",
          "def kill_llama_stack_server():\n",
          "    # Kill any existing llama stack server processes\n",
-          "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+          "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
        ]
      },
      {
--- a/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb
+++ b/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb
@ -3216,19 +3216,19 @@
            "INFO:datasets:Duckdb version 1.1.3 available.\n",
            "INFO:datasets:TensorFlow version 2.18.0 available.\n",
            "INFO:datasets:JAX version 0.4.33 available.\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: basic::equality served by basic\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: basic::subset_of served by basic\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:\n"
+            "INFO:llama_stack.core.stack:Scoring_fns: basic::equality served by basic\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: basic::subset_of served by basic\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
+            "INFO:llama_stack.core.stack:\n"
          ]
        },
        {
@ -3448,7 +3448,7 @@
        "\n",
        "os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
        "\n",
-        "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+        "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
        "client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
        "_ = client.initialize()"
      ]
--- a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
+++ b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
@ -48,7 +48,7 @@
   "outputs": [],
   "source": [
    "from llama_stack_client import LlamaStackClient, Agent\n",
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "from rich.pretty import pprint\n",
    "import json\n",
    "import uuid\n",
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@ -661,7 +661,7 @@
        "except ImportError:\n",
        "    print(\"Not in Google Colab environment\")\n",
        "\n",
-        "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+        "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
        "\n",
        "client = LlamaStackAsLibraryClient(\"together\")\n",
        "_ = client.initialize()"
--- a/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb
+++ b/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb
@ -35,7 +35,7 @@
   ],
   "source": [
    "from llama_stack_client import LlamaStackClient, Agent\n",
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "from rich.pretty import pprint\n",
    "import json\n",
    "import uuid\n",
--- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
+++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
@ -194,7 +194,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "\n",
    "client =  LlamaStackAsLibraryClient(\"nvidia\")\n",
    "client.initialize()"
--- a/docs/notebooks/nvidia/tool_calling/2_finetuning_and_inference.ipynb
+++ b/docs/notebooks/nvidia/tool_calling/2_finetuning_and_inference.ipynb
@ -56,7 +56,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "\n",
    "client = LlamaStackAsLibraryClient(\"nvidia\")\n",
    "client.initialize()"
--- a/docs/notebooks/nvidia/tool_calling/3_model_evaluation.ipynb
+++ b/docs/notebooks/nvidia/tool_calling/3_model_evaluation.ipynb
@ -56,7 +56,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "\n",
    "client = LlamaStackAsLibraryClient(\"nvidia\")\n",
    "client.initialize()"
--- a/docs/notebooks/nvidia/tool_calling/4_adding_safety_guardrails.ipynb
+++ b/docs/notebooks/nvidia/tool_calling/4_adding_safety_guardrails.ipynb
@ -56,7 +56,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "\n",
    "client = LlamaStackAsLibraryClient(\"nvidia\")\n",
    "client.initialize()"
--- a/docs/openapi_generator/README.md
+++ b/docs/openapi_generator/README.md
@ -1 +1 @@
-The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
+The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@ -17,7 +17,7 @@ import fire
 import ruamel.yaml as yaml

 from llama_stack.apis.version import LLAMA_STACK_API_VERSION  # noqa: E402
-from llama_stack.distribution.stack import LlamaStack  # noqa: E402
+from llama_stack.core.stack import LlamaStack  # noqa: E402

 from .pyopenapi.options import Options  # noqa: E402
 from .pyopenapi.specification import Info, Server  # noqa: E402
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@ -12,7 +12,7 @@ from typing import TextIO
 from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args

 from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
-from llama_stack.distribution.resolver import api_protocol_map
+from llama_stack.core.resolver import api_protocol_map

 from .generator import Generator
 from .options import Options
--- a/docs/original_rfc.md
+++ b/docs/original_rfc.md
@ -73,7 +73,7 @@ The API is defined in the [YAML](_static/llama-stack-spec.yaml) and [HTML](_stat

 To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.

-There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distribution/server/server.py) repository.
+There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack.core/server/server.py) repository.

 ## Limitations

--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@ -187,7 +187,7 @@
        "# use this helper if needed to kill the server \n",
        "def kill_llama_stack_server():\n",
        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
      ]
    },
    {
--- a/docs/source/apis/external.md
+++ b/docs/source/apis/external.md
@ -355,7 +355,7 @@ server:
 8. Run the server:

 ```bash
-python -m llama_stack.distribution.server.server --yaml-config ~/.llama/run-byoa.yaml
+python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
 ```

 9. Test the API:
--- a/docs/source/building_applications/playground/index.md
+++ b/docs/source/building_applications/playground/index.md
@ -103,5 +103,5 @@ llama stack run together

 2. Start Streamlit UI
 ```bash
-uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
+uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
 ```
--- a/docs/source/deploying/kubernetes_deployment.md
+++ b/docs/source/deploying/kubernetes_deployment.md
@ -174,7 +174,7 @@ spec:
      - name: llama-stack
        image: localhost/llama-stack-run-k8s:latest
        imagePullPolicy: IfNotPresent
-        command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"]
+        command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
        ports:
          - containerPort: 5000
        volumeMounts:
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@ -59,7 +59,7 @@ Build a Llama stack container

 options:
  -h, --help            show this help message and exit
-  --config CONFIG       Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will
+  --config CONFIG       Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will
                        be prompted to enter information interactively (default: None)
  --template TEMPLATE   Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
  --list-templates      Show the available templates for building a Llama Stack distribution (default: False)
--- a/docs/source/distributions/importing_as_library.md
+++ b/docs/source/distributions/importing_as_library.md
@ -10,7 +10,7 @@ llama stack build --template starter --image-type venv
 ```

 ```python
-from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+from llama_stack.core.library_client import LlamaStackAsLibraryClient

 client = LlamaStackAsLibraryClient(
    "starter",
--- a/docs/source/distributions/k8s/stack-k8s.yaml.template
+++ b/docs/source/distributions/k8s/stack-k8s.yaml.template
@ -52,7 +52,7 @@ spec:
          value: "${SAFETY_MODEL}"
        - name: TAVILY_SEARCH_API_KEY
          value: "${TAVILY_SEARCH_API_KEY}"
-        command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
+        command: ["python", "-m", "llama_stack.core.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
        ports:
          - containerPort: 8321
        volumeMounts:
--- a/docs/source/providers/external/external-providers-guide.md
+++ b/docs/source/providers/external/external-providers-guide.md
@ -1,9 +1,4 @@
-# External Providers Guide
-
-Llama Stack supports external providers that live outside of the main codebase. This allows you to:
- Create and maintain your own providers independently
- Share providers with others without contributing to the main codebase
- Keep provider-specific code separate from the core Llama Stack code
+# Creating External Providers

 ## Configuration

@ -55,17 +50,6 @@ Llama Stack supports two types of external providers:
 1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
 2. **Inline Providers**: Providers that run locally within the Llama Stack process

-## Known External Providers
-
-Here's a list of known external providers that you can use with Llama Stack:
-
-| Name | Description | API | Type | Repository |
-|------|-------------|-----|------|------------|
-| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
-| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
-| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
-| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
-
 ### Remote Provider Specification

 Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
@ -119,9 +103,9 @@ container_image: custom-vector-store:latest  # optional
 - `provider_data_validator`: Optional validator for provider data
 - `container_image`: Optional container image to use instead of pip packages

-## Required Implementation
+## Required Fields

-## All Providers
+### All Providers

 All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:

@ -146,7 +130,7 @@ def get_provider_spec() -> ProviderSpec:
    )
 ```

-### Remote Providers
+#### Remote Providers

 Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
 1. `config`: An instance of the provider's config class
@ -162,7 +146,7 @@ async def get_adapter_impl(
    return OllamaInferenceAdapter(config)
 ```

-### Inline Providers
+#### Inline Providers

 Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
 1. `config`: An instance of the provider's config class
@ -189,7 +173,40 @@ Version: 0.1.0
 Location: /path/to/venv/lib/python3.10/site-packages
 ```

-## Example using `external_providers_dir`: Custom Ollama Provider
+## Best Practices
+
+1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
+
+2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
+
+3. **Dependencies**: Only include the minimum required dependencies in your provider package.
+
+4. **Documentation**: Include clear documentation in your provider package about:
+   - Installation requirements
+   - Configuration options
+   - Usage examples
+   - Any limitations or known issues
+
+5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
+You can refer to the [integration tests
+guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
+information. Execute the test for the Provider type you are developing.
+
+## Troubleshooting
+
+If your external provider isn't being loaded:
+
+1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
+1. Check that the `external_providers_dir` path is correct and accessible.
+2. Verify that the YAML files are properly formatted.
+3. Ensure all required Python packages are installed.
+4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
+   information using `LLAMA_STACK_LOGGING=all=debug`.
+5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
+
+## Examples
+
+### Example using `external_providers_dir`: Custom Ollama Provider

 Here's a complete example of creating and using a custom Ollama provider:

@ -241,7 +258,7 @@ external_providers_dir: ~/.llama/providers.d/
 The provider will now be available in Llama Stack with the type `remote::custom_ollama`.


-## Example using `module`: ramalama-stack
+### Example using `module`: ramalama-stack

 [ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.

@ -267,34 +284,3 @@ additional_pip_packages:
 No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.

 The provider will now be available in Llama Stack with the type `remote::ramalama`.
-
-## Best Practices
-
-1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
-
-2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
-
-3. **Dependencies**: Only include the minimum required dependencies in your provider package.
-
-4. **Documentation**: Include clear documentation in your provider package about:
-   - Installation requirements
-   - Configuration options
-   - Usage examples
-   - Any limitations or known issues
-
-5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
-You can refer to the [integration tests
-guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
-information. Execute the test for the Provider type you are developing.
-
-## Troubleshooting
-
-If your external provider isn't being loaded:
-
-1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
-1. Check that the `external_providers_dir` path is correct and accessible.
-2. Verify that the YAML files are properly formatted.
-3. Ensure all required Python packages are installed.
-4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
-   information using `LLAMA_STACK_LOGGING=all=debug`.
-5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
--- a/docs/source/providers/external/external-providers-list.md
+++ b/docs/source/providers/external/external-providers-list.md
@ -0,0 +1,10 @@
+# Known External Providers
+
+Here's a list of known external providers that you can use with Llama Stack:
+
+| Name | Description | API | Type | Repository |
+|------|-------------|-----|------|------------|
+| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
+| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
+| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
+| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
--- a/docs/source/providers/external/index.md
+++ b/docs/source/providers/external/index.md
@ -0,0 +1,13 @@
+# External Providers
+
+Llama Stack supports external providers that live outside of the main codebase. This allows you to:
+- Create and maintain your own providers independently
+- Share providers with others without contributing to the main codebase
+- Keep provider-specific code separate from the core Llama Stack code
+
+```{toctree}
+:maxdepth: 1
+
+external-providers-list
+external-providers-guide
+```
--- a/docs/source/providers/index.md
+++ b/docs/source/providers/index.md
@ -15,7 +15,7 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
 ```{toctree}
 :maxdepth: 1

-external
+external/index
 openai
 inference/index
 agents/index
--- a/docs/source/providers/post_training/inline_huggingface.md
+++ b/docs/source/providers/post_training/inline_huggingface.md
@ -24,6 +24,10 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
 | `weight_decay` | `<class 'float'>` | No | 0.01 |  |
 | `dataloader_num_workers` | `<class 'int'>` | No | 4 |  |
 | `dataloader_pin_memory` | `<class 'bool'>` | No | True |  |
+| `dpo_beta` | `<class 'float'>` | No | 0.1 |  |
+| `use_reference_model` | `<class 'bool'>` | No | True |  |
+| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid |  |
+| `dpo_output_dir` | `<class 'str'>` | No | ./checkpoints/dpo |  |

 ## Sample Configuration

--- a/docs/zero_to_hero_guide/06_Safety101.ipynb
+++ b/docs/zero_to_hero_guide/06_Safety101.ipynb
@ -66,7 +66,7 @@
        "from pydantic import BaseModel\n",
        "from termcolor import cprint\n",
        "\n",
-        "from llama_stack.distribution.datatypes import RemoteProviderConfig\n",
+        "from llama_stack.core.datatypes import RemoteProviderConfig\n",
        "from llama_stack.apis.safety import Safety\n",
        "from llama_stack_client import LlamaStackClient\n",
        "\n",
--- a/llama_stack/init.py
+++ b/llama_stack/init.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.distribution.library_client import (  # noqa: F401
+from llama_stack.core.library_client import (  # noqa: F401
    AsyncLlamaStackAsLibraryClient,
    LlamaStackAsLibraryClient,
 )
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@ -193,18 +193,10 @@ class DPOLossType(Enum):
 class DPOAlignmentConfig(BaseModel):
    """Configuration for Direct Preference Optimization (DPO) alignment.

-    :param reward_scale: Scaling factor for the reward signal
-    :param reward_clip: Maximum absolute value for reward clipping
-    :param epsilon: Small value added for numerical stability
-    :param gamma: Discount factor for future rewards
    :param beta: Temperature parameter for the DPO loss
    :param loss_type: The type of loss function to use for DPO
    """

-    reward_scale: float
-    reward_clip: float
-    epsilon: float
-    gamma: float
    beta: float
    loss_type: DPOLossType = DPOLossType.sigmoid

--- a/llama_stack/cli/download.py
+++ b/llama_stack/cli/download.py
@ -323,7 +323,7 @@ def _hf_download(
    from huggingface_hub import snapshot_download
    from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError

-    from llama_stack.distribution.utils.model_utils import model_local_dir
+    from llama_stack.core.utils.model_utils import model_local_dir

    repo_id = model.huggingface_repo
    if repo_id is None:
@ -361,7 +361,7 @@ def _meta_download(
    info: "LlamaDownloadInfo",
    max_concurrent_downloads: int,
 ):
-    from llama_stack.distribution.utils.model_utils import model_local_dir
+    from llama_stack.core.utils.model_utils import model_local_dir

    output_dir = Path(model_local_dir(model.descriptor()))
    os.makedirs(output_dir, exist_ok=True)
@ -403,7 +403,7 @@ class Manifest(BaseModel):


 def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
-    from llama_stack.distribution.utils.model_utils import model_local_dir
+    from llama_stack.core.utils.model_utils import model_local_dir

    with open(manifest_file) as f:
        d = json.load(f)
--- a/llama_stack/cli/model/list.py
+++ b/llama_stack/cli/model/list.py
@ -11,7 +11,7 @@ from pathlib import Path

 from llama_stack.cli.subcommand import Subcommand
 from llama_stack.cli.table import print_table
-from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
+from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
 from llama_stack.models.llama.sku_list import all_registered_models


--- a/llama_stack/cli/model/remove.py
+++ b/llama_stack/cli/model/remove.py
@ -9,7 +9,7 @@ import os
 import shutil

 from llama_stack.cli.subcommand import Subcommand
-from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
+from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
 from llama_stack.models.llama.sku_list import resolve_model


--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -23,27 +23,27 @@ from termcolor import colored, cprint

 from llama_stack.cli.stack.utils import ImageType
 from llama_stack.cli.table import print_table
-from llama_stack.distribution.build import (
+from llama_stack.core.build import (
    SERVER_DEPENDENCIES,
    build_image,
    get_provider_dependencies,
 )
-from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.configure import parse_and_maybe_upgrade_config
+from llama_stack.core.datatypes import (
    BuildConfig,
    BuildProvider,
    DistributionSpec,
    Provider,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.external import load_external_apis
-from llama_stack.distribution.resolver import InvalidProviderError
-from llama_stack.distribution.stack import replace_env_vars
-from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
-from llama_stack.distribution.utils.exec import formulate_run_args, run_command
-from llama_stack.distribution.utils.image_types import LlamaStackImageType
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.resolver import InvalidProviderError
+from llama_stack.core.stack import replace_env_vars
+from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.core.utils.exec import formulate_run_args, run_command
+from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api

 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@ -27,7 +27,7 @@ class StackBuild(Subcommand):
            "--config",
            type=str,
            default=None,
-            help="Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
+            help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
        )

        self.parser.add_argument(
--- a/llama_stack/cli/stack/list_apis.py
+++ b/llama_stack/cli/stack/list_apis.py
@ -26,7 +26,7 @@ class StackListApis(Subcommand):

    def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
        from llama_stack.cli.table import print_table
-        from llama_stack.distribution.distribution import stack_apis
+        from llama_stack.core.distribution import stack_apis

        # eventually, this should query a registry at llama.meta.com/llamastack/distributions
        headers = [
--- a/llama_stack/cli/stack/list_providers.py
+++ b/llama_stack/cli/stack/list_providers.py
@ -23,7 +23,7 @@ class StackListProviders(Subcommand):

    @property
    def providable_apis(self):
-        from llama_stack.distribution.distribution import providable_apis
+        from llama_stack.core.distribution import providable_apis

        return [api.value for api in providable_apis()]

@ -38,7 +38,7 @@ class StackListProviders(Subcommand):

    def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
        from llama_stack.cli.table import print_table
-        from llama_stack.distribution.distribution import Api, get_provider_registry
+        from llama_stack.core.distribution import Api, get_provider_registry

        all_providers = get_provider_registry()
        if args.api:
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -85,8 +85,8 @@ class StackRun(Subcommand):
    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
        import yaml

-        from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
-        from llama_stack.distribution.utils.exec import formulate_run_args, run_command
+        from llama_stack.core.configure import parse_and_maybe_upgrade_config
+        from llama_stack.core.utils.exec import formulate_run_args, run_command

        if args.enable_ui:
            self._start_ui_development_server(args.port)
@ -94,7 +94,7 @@ class StackRun(Subcommand):

        if args.config:
            try:
-                from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template
+                from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template

                config_file = resolve_config_or_template(args.config, Mode.RUN)
            except ValueError as e:
@ -127,7 +127,7 @@ class StackRun(Subcommand):
        # using the current environment packages.
        if not image_type and not image_name:
            logger.info("No image type or image name provided. Assuming environment packages.")
-            from llama_stack.distribution.server.server import main as server_main
+            from llama_stack.core.server.server import main as server_main

            # Build the server args from the current args passed to the CLI
            server_args = argparse.Namespace()
--- a/llama_stack/cli/verify_download.py
+++ b/llama_stack/cli/verify_download.py
@ -107,7 +107,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -


 def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
-    from llama_stack.distribution.utils.model_utils import model_local_dir
+    from llama_stack.core.utils.model_utils import model_local_dir

    console = Console()
    model_dir = Path(model_local_dir(args.model_id))
--- a/llama_stack/distribution/init.py
+++ b/llama_stack/distribution/init.py
--- a/llama_stack/distribution/access_control/init.py
+++ b/llama_stack/distribution/access_control/init.py
--- a/llama_stack/distribution/access_control/access_control.py
+++ b/llama_stack/distribution/access_control/access_control.py
@ -6,7 +6,7 @@

 from typing import Any

-from llama_stack.distribution.datatypes import User
+from llama_stack.core.datatypes import User

 from .conditions import (
    Condition,
--- a/llama_stack/distribution/access_control/conditions.py
+++ b/llama_stack/distribution/access_control/conditions.py
--- a/llama_stack/distribution/access_control/datatypes.py
+++ b/llama_stack/distribution/access_control/datatypes.py
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -12,11 +12,11 @@ from pathlib import Path
 from pydantic import BaseModel
 from termcolor import cprint

-from llama_stack.distribution.datatypes import BuildConfig
-from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.external import load_external_apis
-from llama_stack.distribution.utils.exec import run_command
-from llama_stack.distribution.utils.image_types import LlamaStackImageType
+from llama_stack.core.datatypes import BuildConfig
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.utils.exec import run_command
+from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 from llama_stack.templates.template import DistributionTemplate

@ -122,7 +122,7 @@ def build_image(
                normal_deps.extend(api_spec.pip_packages)

    if build_config.image_type == LlamaStackImageType.CONTAINER.value:
-        script = str(importlib.resources.files("llama_stack") / "distribution/build_container.sh")
+        script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
        args = [
            script,
            "--template-or-config",
@ -139,7 +139,7 @@ def build_image(
        if run_config is not None:
            args.extend(["--run-config", run_config])
    elif build_config.image_type == LlamaStackImageType.CONDA.value:
-        script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh")
+        script = str(importlib.resources.files("llama_stack") / "core/build_conda_env.sh")
        args = [
            script,
            "--env-name",
@ -150,7 +150,7 @@ def build_image(
            " ".join(normal_deps),
        ]
    elif build_config.image_type == LlamaStackImageType.VENV.value:
-        script = str(importlib.resources.files("llama_stack") / "distribution/build_venv.sh")
+        script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
        args = [
            script,
            "--env-name",
--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -327,12 +327,12 @@ EOF
 # If a run config is provided, we use the --config flag
 if [[ -n "$run_config" ]]; then
  add_to_container << EOF
-ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "$RUN_CONFIG_PATH"]
+ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--config", "$RUN_CONFIG_PATH"]
 EOF
 # If a template is provided (not a yaml file), we use the --template flag
 elif [[ "$template_or_config" != *.yaml ]]; then
  add_to_container << EOF
-ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$template_or_config"]
+ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--template", "$template_or_config"]
 EOF
 fi

--- a/llama_stack/distribution/build_venv.sh
+++ b/llama_stack/distribution/build_venv.sh
--- a/llama_stack/distribution/client.py
+++ b/llama_stack/distribution/client.py
--- a/llama_stack/distribution/common.sh
+++ b/llama_stack/distribution/common.sh
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@ -7,20 +7,20 @@ import logging
 import textwrap
 from typing import Any

-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    LLAMA_STACK_RUN_CONFIG_VERSION,
    DistributionSpec,
    Provider,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import (
+from llama_stack.core.distribution import (
    builtin_automatically_routed_apis,
    get_provider_registry,
 )
-from llama_stack.distribution.stack import cast_image_name_to_string, replace_env_vars
-from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
-from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
+from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
+from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.providers.datatypes import Api, ProviderSpec

 logger = logging.getLogger(__name__)
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -24,7 +24,7 @@ from llama_stack.apis.shields import Shield, ShieldInput
 from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
-from llama_stack.distribution.access_control.datatypes import AccessRule
+from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.providers.datatypes import Api, ProviderSpec
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -12,8 +12,8 @@ from typing import Any
 import yaml
 from pydantic import BaseModel

-from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec
-from llama_stack.distribution.external import load_external_apis
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    AdapterSpec,
--- a/llama_stack/distribution/external.py
+++ b/llama_stack/distribution/external.py
@ -8,7 +8,7 @@
 import yaml

 from llama_stack.apis.datatypes import Api, ExternalApiSpec
-from llama_stack.distribution.datatypes import BuildConfig, StackRunConfig
+from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger

 logger = get_logger(name=__name__, category="core")
--- a/llama_stack/distribution/inspect.py
+++ b/llama_stack/distribution/inspect.py
@ -15,9 +15,9 @@ from llama_stack.apis.inspect import (
    RouteInfo,
    VersionInfo,
 )
-from llama_stack.distribution.datatypes import StackRunConfig
-from llama_stack.distribution.external import load_external_apis
-from llama_stack.distribution.server.routes import get_all_api_routes
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.server.routes import get_all_api_routes
 from llama_stack.providers.datatypes import HealthStatus


--- a/llama_stack/distribution/library_client.py
+++ b/llama_stack/distribution/library_client.py
@ -31,23 +31,23 @@ from pydantic import BaseModel, TypeAdapter
 from rich.console import Console
 from termcolor import cprint

-from llama_stack.distribution.build import print_pip_install_help
-from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
-from llama_stack.distribution.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
-from llama_stack.distribution.request_headers import (
+from llama_stack.core.build import print_pip_install_help
+from llama_stack.core.configure import parse_and_maybe_upgrade_config
+from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
+from llama_stack.core.request_headers import (
    PROVIDER_DATA_VAR,
    request_provider_data_context,
 )
-from llama_stack.distribution.resolver import ProviderRegistry
-from llama_stack.distribution.server.routes import RouteImpls, find_matching_route, initialize_route_impls
-from llama_stack.distribution.stack import (
+from llama_stack.core.resolver import ProviderRegistry
+from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
+from llama_stack.core.stack import (
    construct_stack,
    get_stack_run_config_from_template,
    replace_env_vars,
 )
-from llama_stack.distribution.utils.config import redact_sensitive_fields
-from llama_stack.distribution.utils.context import preserve_contexts_async_generator
-from llama_stack.distribution.utils.exec import in_notebook
+from llama_stack.core.utils.config import redact_sensitive_fields
+from llama_stack.core.utils.context import preserve_contexts_async_generator
+from llama_stack.core.utils.exec import in_notebook
 from llama_stack.providers.utils.telemetry.tracing import (
    CURRENT_TRACE_CONTEXT,
    end_trace,
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
--- a/llama_stack/distribution/request_headers.py
+++ b/llama_stack/distribution/request_headers.py
@ -10,7 +10,7 @@ import logging
 from contextlib import AbstractContextManager
 from typing import Any

-from llama_stack.distribution.datatypes import User
+from llama_stack.core.datatypes import User

 from .utils.dynamic import instantiate_class_type

--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -27,18 +27,18 @@ from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
-from llama_stack.distribution.client import get_client_impl
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.client import get_client_impl
+from llama_stack.core.datatypes import (
    AccessRule,
    AutoRoutedProviderSpec,
    Provider,
    RoutingTableProviderSpec,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import builtin_automatically_routed_apis
-from llama_stack.distribution.external import load_external_apis
-from llama_stack.distribution.store import DistributionRegistry
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.core.distribution import builtin_automatically_routed_apis
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.store import DistributionRegistry
+from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    Api,
@ -183,7 +183,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
                spec=RoutingTableProviderSpec(
                    api=info.routing_table_api,
                    router_api=info.router_api,
-                    module="llama_stack.distribution.routers",
+                    module="llama_stack.core.routers",
                    api_dependencies=[],
                    deps__=[f"inner-{info.router_api.value}"],
                ),
@ -197,7 +197,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
                config={},
                spec=AutoRoutedProviderSpec(
                    api=info.router_api,
-                    module="llama_stack.distribution.routers",
+                    module="llama_stack.core.routers",
                    routing_table_api=info.routing_table_api,
                    api_dependencies=[info.routing_table_api],
                    # Add telemetry as an optional dependency to all auto-routed providers
--- a/llama_stack/distribution/routers/init.py
+++ b/llama_stack/distribution/routers/init.py
@ -6,9 +6,9 @@

 from typing import Any

-from llama_stack.distribution.datatypes import AccessRule, RoutedProtocol
-from llama_stack.distribution.stack import StackRunConfig
-from llama_stack.distribution.store import DistributionRegistry
+from llama_stack.core.datatypes import AccessRule, RoutedProtocol
+from llama_stack.core.stack import StackRunConfig
+from llama_stack.core.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore

--- a/llama_stack/distribution/routers/datasets.py
+++ b/llama_stack/distribution/routers/datasets.py
--- a/llama_stack/distribution/routers/eval_scoring.py
+++ b/llama_stack/distribution/routers/eval_scoring.py
--- a/llama_stack/distribution/routers/inference.py
+++ b/llama_stack/distribution/routers/inference.py
--- a/llama_stack/distribution/routers/safety.py
+++ b/llama_stack/distribution/routers/safety.py
--- a/llama_stack/distribution/routers/tool_runtime.py
+++ b/llama_stack/distribution/routers/tool_runtime.py
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
--- a/llama_stack/distribution/routing_tables/init.py
+++ b/llama_stack/distribution/routing_tables/init.py
--- a/llama_stack/distribution/routing_tables/benchmarks.py
+++ b/llama_stack/distribution/routing_tables/benchmarks.py
@ -7,7 +7,7 @@
 from typing import Any

 from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routing_tables/common.py
+++ b/llama_stack/distribution/routing_tables/common.py
@ -10,16 +10,16 @@ from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.apis.models import Model
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.scoring_functions import ScoringFn
-from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
-from llama_stack.distribution.access_control.datatypes import Action
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
+from llama_stack.core.access_control.datatypes import Action
+from llama_stack.core.datatypes import (
    AccessRule,
    RoutableObject,
    RoutableObjectWithProvider,
    RoutedProtocol,
 )
-from llama_stack.distribution.request_headers import get_authenticated_user
-from llama_stack.distribution.store import DistributionRegistry
+from llama_stack.core.request_headers import get_authenticated_user
+from llama_stack.core.store import DistributionRegistry
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, RoutingTable

--- a/llama_stack/distribution/routing_tables/datasets.py
+++ b/llama_stack/distribution/routing_tables/datasets.py
@ -19,7 +19,7 @@ from llama_stack.apis.datasets import (
    URIDataSource,
 )
 from llama_stack.apis.resource import ResourceType
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    DatasetWithOwner,
 )
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routing_tables/models.py
+++ b/llama_stack/distribution/routing_tables/models.py
@ -9,7 +9,7 @@ from typing import Any

 from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    ModelWithOwner,
    RegistryEntrySource,
 )
--- a/llama_stack/distribution/routing_tables/scoring_functions.py
+++ b/llama_stack/distribution/routing_tables/scoring_functions.py
@ -12,7 +12,7 @@ from llama_stack.apis.scoring_functions import (
    ScoringFnParams,
    ScoringFunctions,
 )
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    ScoringFnWithOwner,
 )
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routing_tables/shields.py
+++ b/llama_stack/distribution/routing_tables/shields.py
@ -8,7 +8,7 @@ from typing import Any

 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    ShieldWithOwner,
 )
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routing_tables/toolgroups.py
+++ b/llama_stack/distribution/routing_tables/toolgroups.py
@ -8,7 +8,7 @@ from typing import Any

 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
-from llama_stack.distribution.datatypes import ToolGroupWithOwner
+from llama_stack.core.datatypes import ToolGroupWithOwner
 from llama_stack.log import get_logger

 from .common import CommonRoutingTableImpl
--- a/llama_stack/distribution/routing_tables/vector_dbs.py
+++ b/llama_stack/distribution/routing_tables/vector_dbs.py
@ -23,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    VectorDBWithOwner,
 )
 from llama_stack.log import get_logger
@ -84,8 +84,6 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):

    async def unregister_vector_db(self, vector_db_id: str) -> None:
        existing_vector_db = await self.get_vector_db(vector_db_id)
-        if existing_vector_db is None:
-            raise VectorStoreNotFoundError(vector_db_id)
        await self.unregister_object(existing_vector_db)

    async def openai_retrieve_vector_store(
--- a/llama_stack/distribution/server/init.py
+++ b/llama_stack/distribution/server/init.py
--- a/llama_stack/distribution/server/auth.py
+++ b/llama_stack/distribution/server/auth.py
@ -9,10 +9,10 @@ import json
 import httpx
 from aiohttp import hdrs

-from llama_stack.distribution.datatypes import AuthenticationConfig, User
-from llama_stack.distribution.request_headers import user_from_scope
-from llama_stack.distribution.server.auth_providers import create_auth_provider
-from llama_stack.distribution.server.routes import find_matching_route, initialize_route_impls
+from llama_stack.core.datatypes import AuthenticationConfig, User
+from llama_stack.core.request_headers import user_from_scope
+from llama_stack.core.server.auth_providers import create_auth_provider
+from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
 from llama_stack.log import get_logger

 logger = get_logger(name=__name__, category="auth")
--- a/llama_stack/distribution/server/auth_providers.py
+++ b/llama_stack/distribution/server/auth_providers.py
@ -14,7 +14,7 @@ import httpx
 from jose import jwt
 from pydantic import BaseModel, Field

-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    AuthenticationConfig,
    CustomAuthConfig,
    GitHubTokenAuthConfig,
--- a/llama_stack/distribution/server/quota.py
+++ b/llama_stack/distribution/server/quota.py
--- a/llama_stack/distribution/server/routes.py
+++ b/llama_stack/distribution/server/routes.py
@ -15,7 +15,7 @@ from starlette.routing import Route
 from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
 from llama_stack.apis.version import LLAMA_STACK_API_VERSION
-from llama_stack.distribution.resolver import api_protocol_map
+from llama_stack.core.resolver import api_protocol_map
 from llama_stack.schema_utils import WebMethod

 EndpointFunc = Callable[..., Any]
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -33,35 +33,35 @@ from pydantic import BaseModel, ValidationError

 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.cli.utils import add_config_template_args, get_config_from_args
-from llama_stack.distribution.access_control.access_control import AccessDeniedError
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.access_control.access_control import AccessDeniedError
+from llama_stack.core.datatypes import (
    AuthenticationRequiredError,
    LoggingConfig,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import builtin_automatically_routed_apis
-from llama_stack.distribution.external import ExternalApiSpec, load_external_apis
-from llama_stack.distribution.request_headers import (
+from llama_stack.core.distribution import builtin_automatically_routed_apis
+from llama_stack.core.external import ExternalApiSpec, load_external_apis
+from llama_stack.core.request_headers import (
    PROVIDER_DATA_VAR,
    request_provider_data_context,
    user_from_scope,
 )
-from llama_stack.distribution.resolver import InvalidProviderError
-from llama_stack.distribution.server.routes import (
+from llama_stack.core.resolver import InvalidProviderError
+from llama_stack.core.server.routes import (
    find_matching_route,
    get_all_api_routes,
    initialize_route_impls,
 )
-from llama_stack.distribution.stack import (
+from llama_stack.core.stack import (
    cast_image_name_to_string,
    construct_stack,
    replace_env_vars,
    shutdown_stack,
    validate_env_pair,
 )
-from llama_stack.distribution.utils.config import redact_sensitive_fields
-from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template
-from llama_stack.distribution.utils.context import preserve_contexts_async_generator
+from llama_stack.core.utils.config import redact_sensitive_fields
+from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template
+from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -34,14 +34,14 @@ from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
-from llama_stack.distribution.datatypes import Provider, StackRunConfig
-from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.inspect import DistributionInspectConfig, DistributionInspectImpl
-from llama_stack.distribution.providers import ProviderImpl, ProviderImplConfig
-from llama_stack.distribution.resolver import ProviderRegistry, resolve_impls
-from llama_stack.distribution.routing_tables.common import CommonRoutingTableImpl
-from llama_stack.distribution.store.registry import create_dist_registry
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.core.datatypes import Provider, StackRunConfig
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
+from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
+from llama_stack.core.resolver import ProviderRegistry, resolve_impls
+from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
+from llama_stack.core.store.registry import create_dist_registry
+from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api

--- a/llama_stack/distribution/start_stack.sh
+++ b/llama_stack/distribution/start_stack.sh
@ -122,7 +122,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
        yaml_config_arg=""
    fi

-    $PYTHON_BINARY -m llama_stack.distribution.server.server \
+    $PYTHON_BINARY -m llama_stack.core.server.server \
    $yaml_config_arg \
    --port "$port" \
    $env_vars \
--- a/llama_stack/distribution/store/init.py
+++ b/llama_stack/distribution/store/init.py
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@ -10,8 +10,8 @@ from typing import Protocol

 import pydantic

-from llama_stack.distribution.datatypes import RoutableObjectWithProvider
-from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.datatypes import RoutableObjectWithProvider
+from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
--- a/llama_stack/distribution/ui/Containerfile
+++ b/llama_stack/distribution/ui/Containerfile
--- a/llama_stack/distribution/ui/README.md
+++ b/llama_stack/distribution/ui/README.md
@ -36,7 +36,7 @@ llama-stack-client benchmarks register \
 3. Start Streamlit UI

 ```bash
-uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
+uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
 ```

 ## Environment Variables
--- a/Show more
+++ b/Show more