Merge branch 'main' into openai-vector-store/qdrant

2025-08-15 14:08:00 +00:00 · 2025-07-31 15:49:49 -07:00 · 2025-07-31 15:49:49 -07:00 · 970d0f307f
commit 970d0f307f
parent b0e435808a 369286f95b
338 changed files with 15301 additions and 15997 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -0,0 +1,198 @@
 name: 'Run and Record Tests'
 description: 'Run integration tests and handle recording/artifact upload'
 inputs:
  test-types:
    description: 'JSON array of test types to run'
    required: true
  stack-config:
    description: 'Stack configuration to use'
    required: true
  provider:
    description: 'Provider to use for tests'
    required: true
  inference-mode:
    description: 'Inference mode (record or replay)'
    required: true
  run-vision-tests:
    description: 'Whether to run vision tests'
    required: false
    default: 'false'
 runs:
  using: 'composite'
  steps:
    - name: Check Storage and Memory Available Before Tests
      if: ${{ always() }}
      shell: bash
      run: |
        free -h
        df -h
    - name: Set environment variables
      shell: bash
      run: |
        echo "LLAMA_STACK_CLIENT_TIMEOUT=300" >> $GITHUB_ENV
        echo "LLAMA_STACK_TEST_INFERENCE_MODE=${{ inputs.inference-mode }}" >> $GITHUB_ENV
        # Configure provider-specific settings
        if [ "${{ inputs.provider }}" == "ollama" ]; then
          echo "OLLAMA_URL=http://0.0.0.0:11434" >> $GITHUB_ENV
          echo "TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16" >> $GITHUB_ENV
          echo "SAFETY_MODEL=ollama/llama-guard3:1b" >> $GITHUB_ENV
        else
          echo "VLLM_URL=http://localhost:8000/v1" >> $GITHUB_ENV
          echo "TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct" >> $GITHUB_ENV
        fi
        if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
          echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings/vision" >> $GITHUB_ENV
        else
          echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings" >> $GITHUB_ENV
        fi
    - name: Run Llama Stack Server
      if: ${{ contains(inputs.stack-config, 'server:') }}
      shell: bash
      run: |
        # Run this so pytest in a loop doesn't start-stop servers in a loop
        echo "Starting Llama Stack Server"
        nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 &
        echo "Waiting for Llama Stack Server to start"
        for i in {1..30}; do
          if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
            echo "Llama Stack Server started"
            exit 0
          fi
          sleep 1
        done
        echo "Llama Stack Server failed to start"
        cat server.log
        exit 1
    - name: Run Integration Tests
      shell: bash
      run: |
        stack_config="${{ inputs.stack-config }}"
        EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
        # Configure provider-specific settings
        if [ "${{ inputs.provider }}" == "ollama" ]; then
          EXTRA_PARAMS="--safety-shield=llama-guard"
        else
          EXTRA_PARAMS=""
          EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
        fi
        if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
          if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \
            -k "not( ${EXCLUDE_TESTS} )" \
            --vision-model=ollama/llama3.2-vision:11b \
            --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
            --color=yes ${EXTRA_PARAMS} \
            --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-vision.log; then
            echo "✅ Tests completed for vision"
          else
            echo "❌ Tests failed for vision"
            exit 1
          fi
          exit 0
        fi
        # Run non-vision tests
        TEST_TYPES='${{ inputs.test-types }}'
        echo "Test types to run: $TEST_TYPES"
        # Collect all test files for the specified test types
        TEST_FILES=""
        for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
          # if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
          if [ "${{ inputs.provider }}" == "vllm" ]; then
            if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
              echo "Skipping $test_type for vllm provider"
              continue
            fi
          fi
          if [ -d "tests/integration/$test_type" ]; then
            # Find all Python test files in this directory
            test_files=$(find tests/integration/$test_type -name "test_*.py" -o -name "*_test.py")
            if [ -n "$test_files" ]; then
              TEST_FILES="$TEST_FILES $test_files"
              echo "Added test files from $test_type: $(echo $test_files | wc -w) files"
            fi
          else
            echo "Warning: Directory tests/integration/$test_type does not exist"
          fi
        done
        if [ -z "$TEST_FILES" ]; then
          echo "No test files found for the specified test types"
          exit 1
        fi
        echo "=== Running all collected tests in a single pytest command ==="
        echo "Total test files: $(echo $TEST_FILES | wc -w)"
        if uv run pytest -s -v $TEST_FILES --stack-config=${stack_config} \
          -k "not( ${EXCLUDE_TESTS} )" \
          --text-model=$TEXT_MODEL \
          --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
          --color=yes ${EXTRA_PARAMS} \
          --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-all.log; then
          echo "✅ All tests completed successfully"
        else
          echo "❌ Tests failed"
          exit 1
        fi
    - name: Check Storage and Memory Available After Tests
      if: ${{ always() }}
      shell: bash
      run: |
        free -h
        df -h
    - name: Commit and push recordings
      if: ${{ inputs.inference-mode == 'record' }}
      shell: bash
      run: |
        echo "Checking for recording changes"
        git status --porcelain tests/integration/recordings/
        if [[ -n $(git status --porcelain tests/integration/recordings/) ]]; then
          echo "New recordings detected, committing and pushing"
          git add tests/integration/recordings/
          if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
            git commit -m "Recordings update from CI (vision)"
          else
            git commit -m "Recordings update from CI"
          fi
          git fetch origin ${{ github.event.pull_request.head.ref }}
          git rebase origin/${{ github.event.pull_request.head.ref }}
          echo "Rebased successfully"
          git push origin HEAD:${{ github.event.pull_request.head.ref }}
          echo "Pushed successfully"
        else
          echo "No recording changes"
        fi
    - name: Write inference logs to file
      if: ${{ always() }}
      shell: bash
      run: |
        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
    - name: Upload logs
      if: ${{ always() }}
      uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
      with:
        name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
        path: |
          *.log
        retention-days: 1
--- a/.github/actions/run-integration-tests/action.yml
+++ b/.github/actions/run-integration-tests/action.yml
@ -1,73 +0,0 @@
 name: 'Run Integration Tests'
 description: 'Run integration tests with configurable execution mode and provider settings'
 inputs:
  test-types:
    description: 'Test types to run (JSON array)'
    required: true
  stack-config:
    description: 'Stack configuration: "ci-tests" or "server:ci-tests"'
    required: true
  provider:
    description: 'Provider to use: "ollama" or "vllm"'
    required: true
  inference-mode:
    description: 'Inference mode: "record" or "replay"'
    required: true
 outputs:
  logs-path:
    description: 'Path to generated log files'
    value: '*.log'
 runs:
  using: 'composite'
  steps:
    - name: Run Integration Tests
      env:
        LLAMA_STACK_CLIENT_TIMEOUT: "300"
        LLAMA_STACK_TEST_RECORDING_DIR: "tests/integration/recordings"
        LLAMA_STACK_TEST_INFERENCE_MODE: ${{ inputs.inference-mode }}
      shell: bash
      run: |
        stack_config="${{ inputs.stack-config }}"
        EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
        # Configure provider-specific settings
        if [ "${{ inputs.provider }}" == "ollama" ]; then
          export OLLAMA_URL="http://0.0.0.0:11434"
          export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
          export SAFETY_MODEL="ollama/llama-guard3:1b"
          EXTRA_PARAMS="--safety-shield=llama-guard"
        else
          export VLLM_URL="http://localhost:8000/v1"
          export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
          EXTRA_PARAMS=""
          EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
        fi
        TEST_TYPES='${{ inputs.test-types }}'
        echo "Test types to run: $TEST_TYPES"
        for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
          # if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
          if [ "${{ inputs.provider }}" == "vllm" ]; then
            if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
              continue
            fi
          fi
          echo "=== Running tests for: $test_type ==="
          if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \
            -k "not( ${EXCLUDE_TESTS} )" \
            --text-model=$TEXT_MODEL \
            --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
            --color=yes ${EXTRA_PARAMS} \
            --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then
            echo "✅ Tests completed for $test_type"
          else
            echo "❌ Tests failed for $test_type"
            exit 1
          fi
        done
--- a/.github/actions/setup-ollama/action.yml
+++ b/.github/actions/setup-ollama/action.yml
@ -1,11 +1,23 @@
 name: Setup Ollama
 description: Start Ollama
 inputs:
  run-vision-tests:
    description: 'Run vision tests: "true" or "false"'
    required: false
    default: 'false'
 runs:
  using: "composite"
  steps:
    - name: Start Ollama
      shell: bash
      run: |
-        docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
+        if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
          image="ollama-with-vision-model"
        else
          image="ollama-with-models"
        fi
        echo "Starting Ollama with image: $image"
        docker run -d --name ollama -p 11434:11434 docker.io/llamastack/$image
        echo "Verifying Ollama status..."
        timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
--- a/.github/actions/setup-test-environment/action.yml
+++ b/.github/actions/setup-test-environment/action.yml
@ -0,0 +1,51 @@
 name: 'Setup Test Environment'
 description: 'Common setup steps for integration tests including dependencies, providers, and build'
 inputs:
  python-version:
    description: 'Python version to use'
    required: true
  client-version:
    description: 'Client version (latest or published)'
    required: true
  provider:
    description: 'Provider to setup (ollama or vllm)'
    required: true
    default: 'ollama'
  run-vision-tests:
    description: 'Whether to setup provider for vision tests'
    required: false
    default: 'false'
  inference-mode:
    description: 'Inference mode (record or replay)'
    required: true
 runs:
  using: 'composite'
  steps:
    - name: Install dependencies
      uses: ./.github/actions/setup-runner
      with:
        python-version: ${{ inputs.python-version }}
        client-version: ${{ inputs.client-version }}
    - name: Setup ollama
      if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
      uses: ./.github/actions/setup-ollama
      with:
        run-vision-tests: ${{ inputs.run-vision-tests }}
    - name: Setup vllm
      if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
      uses: ./.github/actions/setup-vllm
    - name: Build Llama Stack
      shell: bash
      run: |
        uv run llama stack build --template ci-tests --image-type venv
    - name: Configure git for commits
      shell: bash
      run: |
        git config --local user.email "github-actions[bot]@users.noreply.github.com"
        git config --local user.name "github-actions[bot]"
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@ -1,6 +1,6 @@
 # Llama Stack CI
-Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
+Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
 | Name | File | Purpose |
 | ---- | ---- | ------- |
@ -8,7 +8,7 @@ Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table
 | Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
 | Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
 | SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
-| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration |
+| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
 | Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
 | Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
 | Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -1,22 +1,22 @@
-name: Integration Tests
+name: Integration Tests (Replay)
-run-name: Run the integration test suite from tests/integration
+run-name: Run the integration test suite from tests/integration in replay mode
 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
-    types: [opened, synchronize, labeled]
+    types: [opened, synchronize, reopened]
    paths:
      - 'llama_stack/**'
      - 'tests/**'
      - 'uv.lock'
      - 'pyproject.toml'
      - 'requirements.txt'
      - '.github/workflows/integration-tests.yml' # This workflow
      - '.github/actions/setup-ollama/action.yml'
-      - '.github/actions/run-integration-tests/action.yml'
+      - '.github/actions/setup-test-environment/action.yml'
      - '.github/actions/run-and-record-tests/action.yml'
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
@ -33,31 +33,15 @@ on:
        default: 'ollama'
 concurrency:
-  # This creates three concurrency groups:
+  # Skip concurrency for pushes to main - each commit should be tested independently
-  #   ${{ github.workflow }}-${{ github.ref }}-rerecord (for valid triggers with re-record-tests label)
+  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
  #   ${{ github.workflow }}-${{ github.ref }}-replay (for valid triggers without re-record-tests label)
  #   ${{ github.workflow }}-${{ github.ref }}-no-run (for invalid triggers that will be skipped)
  # The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
  group: >-
    ${{ github.workflow }}-${{ github.ref }}-${{
    (github.event.action == 'opened' ||
     github.event.action == 'synchronize' ||
     (github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))) &&
    (contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'replay') ||
    'no-run'
    }}
  cancel-in-progress: true
 jobs:
  discover-tests:
    if: |
      github.event.action == 'opened' ||
      github.event.action == 'synchronize' ||
      (github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))
    runs-on: ubuntu-latest
    outputs:
      test-types: ${{ steps.generate-test-types.outputs.test-types }}
      rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
    steps:
      - name: Checkout repository
@ -67,94 +51,13 @@ jobs:
        id: generate-test-types
        run: |
          # Get test directories dynamically, excluding non-test directories
          # NOTE: we are excluding post_training since the tests take too long
          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
-            grep -Ev "^(__pycache__|fixtures|test_cases|recordings)$" |
+            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
            sort | jq -R -s -c 'split("\n")[:-1]')
          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
-      - name: Check if re-record-tests label exists
+  run-replay-mode-tests:
        id: check-rerecord-tests
        run: |
          if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
            echo "rerecord-tests=true" >> $GITHUB_OUTPUT
          else
            echo "rerecord-tests=false" >> $GITHUB_OUTPUT
          fi
  record-tests:
    # Sequential job for recording to avoid SQLite conflicts
    if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
    needs: discover-tests
    runs-on: ubuntu-latest
    permissions:
      contents: write
      pull-requests: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
        with:
          python-version: "3.12"  # Use single Python version for recording
          client-version: "latest"
      - name: Setup ollama
        if: ${{ inputs.test-provider == 'ollama' }}
        uses: ./.github/actions/setup-ollama
      - name: Setup vllm
        if: ${{ inputs.test-provider == 'vllm' }}
        uses: ./.github/actions/setup-vllm
      - name: Build Llama Stack
        run: |
          uv run llama stack build --template ci-tests --image-type venv
      - name: Configure git for commits
        run: |
          git config --local user.email "github-actions[bot]@users.noreply.github.com"
          git config --local user.name "github-actions[bot]"
      - name: Run Integration Tests for All Types (Recording Mode)
        uses: ./.github/actions/run-integration-tests
        with:
          test-types: ${{ needs.discover-tests.outputs.test-types }}
          stack-config: 'server:ci-tests'  # recording must be done with server since more tests are run
          provider: ${{ inputs.test-provider }}
          inference-mode: 'record'
      - name: Commit and push recordings
        run: |
          if ! git diff --quiet tests/integration/recordings/; then
            echo "Committing recordings"
            git add tests/integration/recordings/
            git commit -m "Update recordings"
            echo "Pushing all recording commits to PR"
            git push origin HEAD:${{ github.head_ref }}
          else
            echo "No recording changes"
          fi
      - name: Write inference logs to file
        if: ${{ always() }}
        run: |
          sudo docker logs ollama > ollama-recording.log || true
      - name: Upload recording logs
        if: ${{ always() }}
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
          name: recording-logs-${{ github.run_id }}
          path: |
            *.log
          retention-days: 1
  run-tests:
    # Skip this job if we're in recording mode (handled by record-tests job)
    if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
    needs: discover-tests
    runs-on: ubuntu-latest
@ -164,48 +67,29 @@ jobs:
        client-type: [library, server]
        # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
        provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
-        python-version: ["3.12", "3.13"]
+        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
-        client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
        run-vision-tests: ['true', 'false']
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Install dependencies
+      - name: Setup test environment
-        uses: ./.github/actions/setup-runner
+        uses: ./.github/actions/setup-test-environment
        with:
          python-version: ${{ matrix.python-version }}
          client-version: ${{ matrix.client-version }}
          provider: ${{ matrix.provider }}
          run-vision-tests: ${{ matrix.run-vision-tests }}
          inference-mode: 'replay'
-      - name: Build Llama Stack
+      - name: Run tests
-        run: |
+        uses: ./.github/actions/run-and-record-tests
          uv run llama stack build --template ci-tests --image-type venv
      - name: Check Storage and Memory Available Before Tests
        if: ${{ always() }}
        run: |
          free -h
          df -h
      - name: Run Integration Tests (Replay Mode)
        uses: ./.github/actions/run-integration-tests
        with:
          test-types: ${{ needs.discover-tests.outputs.test-types }}
          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
          provider: ${{ matrix.provider }}
          inference-mode: 'replay'
-
+          run-vision-tests: ${{ matrix.run-vision-tests }}
      - name: Check Storage and Memory Available After Tests
        if: ${{ always() }}
        run: |
          free -h
          df -h
      - name: Upload test logs on failure
        if: ${{ failure() }}
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
          name: test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
          path: |
            *.log
          retention-days: 1
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -9,8 +9,8 @@ on:
    paths:
      - 'llama_stack/cli/stack/build.py'
      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/distribution/build.*'
+      - 'llama_stack/core/build.*'
-      - 'llama_stack/distribution/*.sh'
+      - 'llama_stack/core/*.sh'
      - '.github/workflows/providers-build.yml'
      - 'llama_stack/templates/**'
      - 'pyproject.toml'
@ -19,8 +19,8 @@ on:
    paths:
      - 'llama_stack/cli/stack/build.py'
      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/distribution/build.*'
+      - 'llama_stack/core/build.*'
-      - 'llama_stack/distribution/*.sh'
+      - 'llama_stack/core/*.sh'
      - '.github/workflows/providers-build.yml'
      - 'llama_stack/templates/**'
      - 'pyproject.toml'
@ -108,7 +108,7 @@ jobs:
          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
          echo "Entrypoint: $entrypoint"
-          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
+          if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then
            echo "Entrypoint is not correct"
            exit 1
          fi
@ -142,7 +142,7 @@ jobs:
          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
          echo "Entrypoint: $entrypoint"
-          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
+          if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then
            echo "Entrypoint is not correct"
            exit 1
          fi
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@ -12,12 +12,13 @@ on:
      - 'tests/integration/**'
      - 'uv.lock'
      - 'pyproject.toml'
      - 'requirements.txt'
      - 'tests/external/*'
      - '.github/workflows/test-external-provider-module.yml' # This workflow
 jobs:
  test-external-providers-from-module:
    # This workflow is disabled. See https://github.com/meta-llama/llama-stack/pull/2975#issuecomment-3138702984 for details
    if: false
    runs-on: ubuntu-latest
    strategy:
      matrix:
@ -47,7 +48,7 @@ jobs:
      - name: Build distro from config file
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/ramalama-stack/build.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
      - name: Start Llama Stack server in background
        if: ${{ matrix.image-type }} == 'venv'
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@ -43,11 +43,11 @@ jobs:
      - name: Print distro dependencies
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml --print-deps-only
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml --print-deps-only
      - name: Build distro from config file
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml
      - name: Start Llama Stack server in background
        if: ${{ matrix.image-type }} == 'venv'
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,7 +1,7 @@
 include pyproject.toml
 include llama_stack/models/llama/llama3/tokenizer.model
 include llama_stack/models/llama/llama4/tokenizer.model
-include llama_stack/distribution/*.sh
+include llama_stack.core/*.sh
 include llama_stack/cli/scripts/*.sh
 include llama_stack/templates/*/*.yaml
 include llama_stack/providers/tests/test_cases/inference/*.json
--- a/README.md
+++ b/README.md
@ -6,7 +6,6 @@
 [![Discord](https://img.shields.io/discord/1257833999603335178?color=6A7EC2&logo=discord&logoColor=ffffff)](https://discord.gg/llama-stack)
 [![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
 [![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
 ![coverage badge](./coverage.svg)
 [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -15078,22 +15078,6 @@
            "DPOAlignmentConfig": {
                "type": "object",
                "properties": {
                    "reward_scale": {
                        "type": "number",
                        "description": "Scaling factor for the reward signal"
                    },
                    "reward_clip": {
                        "type": "number",
                        "description": "Maximum absolute value for reward clipping"
                    },
                    "epsilon": {
                        "type": "number",
                        "description": "Small value added for numerical stability"
                    },
                    "gamma": {
                        "type": "number",
                        "description": "Discount factor for future rewards"
                    },
                    "beta": {
                        "type": "number",
                        "description": "Temperature parameter for the DPO loss"
@ -15106,10 +15090,6 @@
                },
                "additionalProperties": false,
                "required": [
                    "reward_scale",
                    "reward_clip",
                    "epsilon",
                    "gamma",
                    "beta",
                    "loss_type"
                ],
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -11163,20 +11163,6 @@ components:
    DPOAlignmentConfig:
      type: object
      properties:
        reward_scale:
          type: number
          description: Scaling factor for the reward signal
        reward_clip:
          type: number
          description: >-
            Maximum absolute value for reward clipping
        epsilon:
          type: number
          description: >-
            Small value added for numerical stability
        gamma:
          type: number
          description: Discount factor for future rewards
        beta:
          type: number
          description: Temperature parameter for the DPO loss
@ -11186,10 +11172,6 @@ components:
          description: The type of loss function to use for DPO
      additionalProperties: false
      required:
        - reward_scale
        - reward_clip
        - epsilon
        - gamma
        - beta
        - loss_type
      title: DPOAlignmentConfig
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -165,7 +165,7 @@
        "# use this helper if needed to kill the server \n",
        "def kill_llama_stack_server():\n",
        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
      ]
    },
    {
--- a/docs/getting_started_llama4.ipynb
+++ b/docs/getting_started_llama4.ipynb
@ -275,7 +275,7 @@
        "# use this helper if needed to kill the server \n",
        "def kill_llama_stack_server():\n",
        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
      ]
    },
    {
--- a/docs/getting_started_llama_api.ipynb
+++ b/docs/getting_started_llama_api.ipynb
@ -265,7 +265,7 @@
          "# use this helper if needed to kill the server \n",
          "def kill_llama_stack_server():\n",
          "    # Kill any existing llama stack server processes\n",
-          "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+          "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
        ]
      },
      {
--- a/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb
+++ b/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb
@ -3216,19 +3216,19 @@
            "INFO:datasets:Duckdb version 1.1.3 available.\n",
            "INFO:datasets:TensorFlow version 2.18.0 available.\n",
            "INFO:datasets:JAX version 0.4.33 available.\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: basic::equality served by basic\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: basic::equality served by basic\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: basic::subset_of served by basic\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: basic::subset_of served by basic\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
+            "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
-            "INFO:llama_stack.distribution.stack:\n"
+            "INFO:llama_stack.core.stack:\n"
          ]
        },
        {
@ -3448,7 +3448,7 @@
        "\n",
        "os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
        "\n",
-        "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+        "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
        "client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
        "_ = client.initialize()"
      ]
--- a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
+++ b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
@ -48,7 +48,7 @@
   "outputs": [],
   "source": [
    "from llama_stack_client import LlamaStackClient, Agent\n",
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "from rich.pretty import pprint\n",
    "import json\n",
    "import uuid\n",
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@ -661,7 +661,7 @@
        "except ImportError:\n",
        "    print(\"Not in Google Colab environment\")\n",
        "\n",
-        "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+        "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
        "\n",
        "client = LlamaStackAsLibraryClient(\"together\")\n",
        "_ = client.initialize()"
--- a/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb
+++ b/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb
@ -35,7 +35,7 @@
   ],
   "source": [
    "from llama_stack_client import LlamaStackClient, Agent\n",
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "from rich.pretty import pprint\n",
    "import json\n",
    "import uuid\n",
--- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
+++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
@ -194,7 +194,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "\n",
    "client =  LlamaStackAsLibraryClient(\"nvidia\")\n",
    "client.initialize()"
--- a/docs/notebooks/nvidia/tool_calling/2_finetuning_and_inference.ipynb
+++ b/docs/notebooks/nvidia/tool_calling/2_finetuning_and_inference.ipynb
@ -56,7 +56,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "\n",
    "client = LlamaStackAsLibraryClient(\"nvidia\")\n",
    "client.initialize()"
--- a/docs/notebooks/nvidia/tool_calling/3_model_evaluation.ipynb
+++ b/docs/notebooks/nvidia/tool_calling/3_model_evaluation.ipynb
@ -56,7 +56,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "\n",
    "client = LlamaStackAsLibraryClient(\"nvidia\")\n",
    "client.initialize()"
--- a/docs/notebooks/nvidia/tool_calling/4_adding_safety_guardrails.ipynb
+++ b/docs/notebooks/nvidia/tool_calling/4_adding_safety_guardrails.ipynb
@ -56,7 +56,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
+    "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
    "\n",
    "client = LlamaStackAsLibraryClient(\"nvidia\")\n",
    "client.initialize()"
--- a/docs/openapi_generator/README.md
+++ b/docs/openapi_generator/README.md
@ -1 +1 @@
-The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
+The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@ -17,7 +17,7 @@ import fire
 import ruamel.yaml as yaml
 from llama_stack.apis.version import LLAMA_STACK_API_VERSION  # noqa: E402
-from llama_stack.distribution.stack import LlamaStack  # noqa: E402
+from llama_stack.core.stack import LlamaStack  # noqa: E402
 from .pyopenapi.options import Options  # noqa: E402
 from .pyopenapi.specification import Info, Server  # noqa: E402
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@ -12,7 +12,7 @@ from typing import TextIO
 from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
 from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
-from llama_stack.distribution.resolver import api_protocol_map
+from llama_stack.core.resolver import api_protocol_map
 from .generator import Generator
 from .options import Options
--- a/docs/original_rfc.md
+++ b/docs/original_rfc.md
@ -73,7 +73,7 @@ The API is defined in the [YAML](_static/llama-stack-spec.yaml) and [HTML](_stat
 To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.
-There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distribution/server/server.py) repository.
+There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack.core/server/server.py) repository.
 ## Limitations
--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@ -187,7 +187,7 @@
        "# use this helper if needed to kill the server \n",
        "def kill_llama_stack_server():\n",
        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
      ]
    },
    {
--- a/docs/source/apis/external.md
+++ b/docs/source/apis/external.md
@ -355,7 +355,7 @@ server:
 8. Run the server:
 ```bash
-python -m llama_stack.distribution.server.server --yaml-config ~/.llama/run-byoa.yaml
+python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
 ```
 9. Test the API:
--- a/docs/source/building_applications/playground/index.md
+++ b/docs/source/building_applications/playground/index.md
@ -103,5 +103,5 @@ llama stack run together
 2. Start Streamlit UI
 ```bash
-uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
+uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
 ```
--- a/docs/source/deploying/kubernetes_deployment.md
+++ b/docs/source/deploying/kubernetes_deployment.md
@ -174,7 +174,7 @@ spec:
      - name: llama-stack
        image: localhost/llama-stack-run-k8s:latest
        imagePullPolicy: IfNotPresent
-        command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"]
+        command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
        ports:
          - containerPort: 5000
        volumeMounts:
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@ -59,7 +59,7 @@ Build a Llama stack container
 options:
  -h, --help            show this help message and exit
-  --config CONFIG       Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will
+  --config CONFIG       Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will
                        be prompted to enter information interactively (default: None)
  --template TEMPLATE   Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
  --list-templates      Show the available templates for building a Llama Stack distribution (default: False)
--- a/docs/source/distributions/importing_as_library.md
+++ b/docs/source/distributions/importing_as_library.md
@ -10,7 +10,7 @@ llama stack build --template starter --image-type venv
 ```
 ```python
-from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
 client = LlamaStackAsLibraryClient(
    "starter",
--- a/docs/source/distributions/k8s/stack-k8s.yaml.template
+++ b/docs/source/distributions/k8s/stack-k8s.yaml.template
@ -52,7 +52,7 @@ spec:
          value: "${SAFETY_MODEL}"
        - name: TAVILY_SEARCH_API_KEY
          value: "${TAVILY_SEARCH_API_KEY}"
-        command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
+        command: ["python", "-m", "llama_stack.core.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
        ports:
          - containerPort: 8321
        volumeMounts:
--- a/docs/source/providers/external/external-providers-guide.md
+++ b/docs/source/providers/external/external-providers-guide.md
@ -1,9 +1,4 @@
-# External Providers Guide
+# Creating External Providers
 Llama Stack supports external providers that live outside of the main codebase. This allows you to:
 - Create and maintain your own providers independently
 - Share providers with others without contributing to the main codebase
 - Keep provider-specific code separate from the core Llama Stack code
 ## Configuration
@ -55,17 +50,6 @@ Llama Stack supports two types of external providers:
 1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
 2. **Inline Providers**: Providers that run locally within the Llama Stack process
 ## Known External Providers
 Here's a list of known external providers that you can use with Llama Stack:
 | Name | Description | API | Type | Repository |
 |------|-------------|-----|------|------------|
 | KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
 | KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
 | RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
 | TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
 ### Remote Provider Specification
 Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
@ -119,9 +103,9 @@ container_image: custom-vector-store:latest  # optional
 - `provider_data_validator`: Optional validator for provider data
 - `container_image`: Optional container image to use instead of pip packages
-## Required Implementation
+## Required Fields
-## All Providers
+### All Providers
 All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
@ -146,7 +130,7 @@ def get_provider_spec() -> ProviderSpec:
    )
 ```
-### Remote Providers
+#### Remote Providers
 Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
 1. `config`: An instance of the provider's config class
@ -162,7 +146,7 @@ async def get_adapter_impl(
    return OllamaInferenceAdapter(config)
 ```
-### Inline Providers
+#### Inline Providers
 Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
 1. `config`: An instance of the provider's config class
@ -189,7 +173,40 @@ Version: 0.1.0
 Location: /path/to/venv/lib/python3.10/site-packages
 ```
-## Example using `external_providers_dir`: Custom Ollama Provider
+## Best Practices
 1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
 2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
 3. **Dependencies**: Only include the minimum required dependencies in your provider package.
 4. **Documentation**: Include clear documentation in your provider package about:
   - Installation requirements
   - Configuration options
   - Usage examples
   - Any limitations or known issues
 5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
 You can refer to the [integration tests
 guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
 information. Execute the test for the Provider type you are developing.
 ## Troubleshooting
 If your external provider isn't being loaded:
 1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
 1. Check that the `external_providers_dir` path is correct and accessible.
 2. Verify that the YAML files are properly formatted.
 3. Ensure all required Python packages are installed.
 4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
   information using `LLAMA_STACK_LOGGING=all=debug`.
 5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
 ## Examples
 ### Example using `external_providers_dir`: Custom Ollama Provider
 Here's a complete example of creating and using a custom Ollama provider:
@ -241,7 +258,7 @@ external_providers_dir: ~/.llama/providers.d/
 The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
-## Example using `module`: ramalama-stack
+### Example using `module`: ramalama-stack
 [ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
@ -266,35 +283,4 @@ additional_pip_packages:
 No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
-The provider will now be available in Llama Stack with the type `remote::ramalama`.
+The provider will now be available in Llama Stack with the type `remote::ramalama`.
 ## Best Practices
 1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
 2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
 3. **Dependencies**: Only include the minimum required dependencies in your provider package.
 4. **Documentation**: Include clear documentation in your provider package about:
   - Installation requirements
   - Configuration options
   - Usage examples
   - Any limitations or known issues
 5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
 You can refer to the [integration tests
 guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
 information. Execute the test for the Provider type you are developing.
 ## Troubleshooting
 If your external provider isn't being loaded:
 1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
 1. Check that the `external_providers_dir` path is correct and accessible.
 2. Verify that the YAML files are properly formatted.
 3. Ensure all required Python packages are installed.
 4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
   information using `LLAMA_STACK_LOGGING=all=debug`.
 5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
--- a/docs/source/providers/external/external-providers-list.md
+++ b/docs/source/providers/external/external-providers-list.md
@ -0,0 +1,10 @@
 # Known External Providers
 Here's a list of known external providers that you can use with Llama Stack:
 | Name | Description | API | Type | Repository |
 |------|-------------|-----|------|------------|
 | KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
 | KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
 | RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
 | TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
--- a/docs/source/providers/external/index.md
+++ b/docs/source/providers/external/index.md
@ -0,0 +1,13 @@
 # External Providers
 Llama Stack supports external providers that live outside of the main codebase. This allows you to:
 - Create and maintain your own providers independently
 - Share providers with others without contributing to the main codebase
 - Keep provider-specific code separate from the core Llama Stack code
 ```{toctree}
 :maxdepth: 1
 external-providers-list
 external-providers-guide
 ```
--- a/docs/source/providers/index.md
+++ b/docs/source/providers/index.md
@ -15,7 +15,7 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
 ```{toctree}
 :maxdepth: 1
-external
+external/index
 openai
 inference/index
 agents/index
--- a/docs/source/providers/post_training/inline_huggingface.md
+++ b/docs/source/providers/post_training/inline_huggingface.md
@ -24,6 +24,10 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
 | `weight_decay` | `<class 'float'>` | No | 0.01 |  |
 | `dataloader_num_workers` | `<class 'int'>` | No | 4 |  |
 | `dataloader_pin_memory` | `<class 'bool'>` | No | True |  |
 | `dpo_beta` | `<class 'float'>` | No | 0.1 |  |
 | `use_reference_model` | `<class 'bool'>` | No | True |  |
 | `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid |  |
 | `dpo_output_dir` | `<class 'str'>` | No | ./checkpoints/dpo |  |
 ## Sample Configuration
--- a/docs/zero_to_hero_guide/06_Safety101.ipynb
+++ b/docs/zero_to_hero_guide/06_Safety101.ipynb
@ -66,7 +66,7 @@
        "from pydantic import BaseModel\n",
        "from termcolor import cprint\n",
        "\n",
-        "from llama_stack.distribution.datatypes import RemoteProviderConfig\n",
+        "from llama_stack.core.datatypes import RemoteProviderConfig\n",
        "from llama_stack.apis.safety import Safety\n",
        "from llama_stack_client import LlamaStackClient\n",
        "\n",
--- a/llama_stack/init.py
+++ b/llama_stack/init.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.distribution.library_client import (  # noqa: F401
+from llama_stack.core.library_client import (  # noqa: F401
    AsyncLlamaStackAsLibraryClient,
    LlamaStackAsLibraryClient,
 )
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@ -193,18 +193,10 @@ class DPOLossType(Enum):
 class DPOAlignmentConfig(BaseModel):
    """Configuration for Direct Preference Optimization (DPO) alignment.
    :param reward_scale: Scaling factor for the reward signal
    :param reward_clip: Maximum absolute value for reward clipping
    :param epsilon: Small value added for numerical stability
    :param gamma: Discount factor for future rewards
    :param beta: Temperature parameter for the DPO loss
    :param loss_type: The type of loss function to use for DPO
    """
    reward_scale: float
    reward_clip: float
    epsilon: float
    gamma: float
    beta: float
    loss_type: DPOLossType = DPOLossType.sigmoid
--- a/llama_stack/cli/download.py
+++ b/llama_stack/cli/download.py
@ -323,7 +323,7 @@ def _hf_download(
    from huggingface_hub import snapshot_download
    from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
-    from llama_stack.distribution.utils.model_utils import model_local_dir
+    from llama_stack.core.utils.model_utils import model_local_dir
    repo_id = model.huggingface_repo
    if repo_id is None:
@ -361,7 +361,7 @@ def _meta_download(
    info: "LlamaDownloadInfo",
    max_concurrent_downloads: int,
 ):
-    from llama_stack.distribution.utils.model_utils import model_local_dir
+    from llama_stack.core.utils.model_utils import model_local_dir
    output_dir = Path(model_local_dir(model.descriptor()))
    os.makedirs(output_dir, exist_ok=True)
@ -403,7 +403,7 @@ class Manifest(BaseModel):
 def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
-    from llama_stack.distribution.utils.model_utils import model_local_dir
+    from llama_stack.core.utils.model_utils import model_local_dir
    with open(manifest_file) as f:
        d = json.load(f)
--- a/llama_stack/cli/model/list.py
+++ b/llama_stack/cli/model/list.py
@ -11,7 +11,7 @@ from pathlib import Path
 from llama_stack.cli.subcommand import Subcommand
 from llama_stack.cli.table import print_table
-from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
+from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
 from llama_stack.models.llama.sku_list import all_registered_models
--- a/llama_stack/cli/model/remove.py
+++ b/llama_stack/cli/model/remove.py
@ -9,7 +9,7 @@ import os
 import shutil
 from llama_stack.cli.subcommand import Subcommand
-from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
+from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
 from llama_stack.models.llama.sku_list import resolve_model
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -23,27 +23,27 @@ from termcolor import colored, cprint
 from llama_stack.cli.stack.utils import ImageType
 from llama_stack.cli.table import print_table
-from llama_stack.distribution.build import (
+from llama_stack.core.build import (
    SERVER_DEPENDENCIES,
    build_image,
    get_provider_dependencies,
 )
-from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
+from llama_stack.core.configure import parse_and_maybe_upgrade_config
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    BuildConfig,
    BuildProvider,
    DistributionSpec,
    Provider,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import get_provider_registry
+from llama_stack.core.distribution import get_provider_registry
-from llama_stack.distribution.external import load_external_apis
+from llama_stack.core.external import load_external_apis
-from llama_stack.distribution.resolver import InvalidProviderError
+from llama_stack.core.resolver import InvalidProviderError
-from llama_stack.distribution.stack import replace_env_vars
+from llama_stack.core.stack import replace_env_vars
-from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
+from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.distribution.utils.exec import formulate_run_args, run_command
+from llama_stack.core.utils.exec import formulate_run_args, run_command
-from llama_stack.distribution.utils.image_types import LlamaStackImageType
+from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@ -27,7 +27,7 @@ class StackBuild(Subcommand):
            "--config",
            type=str,
            default=None,
-            help="Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
+            help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
        )
        self.parser.add_argument(
--- a/llama_stack/cli/stack/list_apis.py
+++ b/llama_stack/cli/stack/list_apis.py
@ -26,7 +26,7 @@ class StackListApis(Subcommand):
    def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
        from llama_stack.cli.table import print_table
-        from llama_stack.distribution.distribution import stack_apis
+        from llama_stack.core.distribution import stack_apis
        # eventually, this should query a registry at llama.meta.com/llamastack/distributions
        headers = [
--- a/llama_stack/cli/stack/list_providers.py
+++ b/llama_stack/cli/stack/list_providers.py
@ -23,7 +23,7 @@ class StackListProviders(Subcommand):
    @property
    def providable_apis(self):
-        from llama_stack.distribution.distribution import providable_apis
+        from llama_stack.core.distribution import providable_apis
        return [api.value for api in providable_apis()]
@ -38,7 +38,7 @@ class StackListProviders(Subcommand):
    def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
        from llama_stack.cli.table import print_table
-        from llama_stack.distribution.distribution import Api, get_provider_registry
+        from llama_stack.core.distribution import Api, get_provider_registry
        all_providers = get_provider_registry()
        if args.api:
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -85,8 +85,8 @@ class StackRun(Subcommand):
    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
        import yaml
-        from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
+        from llama_stack.core.configure import parse_and_maybe_upgrade_config
-        from llama_stack.distribution.utils.exec import formulate_run_args, run_command
+        from llama_stack.core.utils.exec import formulate_run_args, run_command
        if args.enable_ui:
            self._start_ui_development_server(args.port)
@ -94,7 +94,7 @@ class StackRun(Subcommand):
        if args.config:
            try:
-                from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template
+                from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template
                config_file = resolve_config_or_template(args.config, Mode.RUN)
            except ValueError as e:
@ -127,7 +127,7 @@ class StackRun(Subcommand):
        # using the current environment packages.
        if not image_type and not image_name:
            logger.info("No image type or image name provided. Assuming environment packages.")
-            from llama_stack.distribution.server.server import main as server_main
+            from llama_stack.core.server.server import main as server_main
            # Build the server args from the current args passed to the CLI
            server_args = argparse.Namespace()
--- a/llama_stack/cli/verify_download.py
+++ b/llama_stack/cli/verify_download.py
@ -107,7 +107,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
 def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
-    from llama_stack.distribution.utils.model_utils import model_local_dir
+    from llama_stack.core.utils.model_utils import model_local_dir
    console = Console()
    model_dir = Path(model_local_dir(args.model_id))
--- a/llama_stack/distribution/init.py
+++ b/llama_stack/distribution/init.py
--- a/llama_stack/distribution/access_control/init.py
+++ b/llama_stack/distribution/access_control/init.py
--- a/llama_stack/distribution/access_control/access_control.py
+++ b/llama_stack/distribution/access_control/access_control.py
@ -6,7 +6,7 @@
 from typing import Any
-from llama_stack.distribution.datatypes import User
+from llama_stack.core.datatypes import User
 from .conditions import (
    Condition,
--- a/llama_stack/distribution/access_control/conditions.py
+++ b/llama_stack/distribution/access_control/conditions.py
--- a/llama_stack/distribution/access_control/datatypes.py
+++ b/llama_stack/distribution/access_control/datatypes.py
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -12,11 +12,11 @@ from pathlib import Path
 from pydantic import BaseModel
 from termcolor import cprint
-from llama_stack.distribution.datatypes import BuildConfig
+from llama_stack.core.datatypes import BuildConfig
-from llama_stack.distribution.distribution import get_provider_registry
+from llama_stack.core.distribution import get_provider_registry
-from llama_stack.distribution.external import load_external_apis
+from llama_stack.core.external import load_external_apis
-from llama_stack.distribution.utils.exec import run_command
+from llama_stack.core.utils.exec import run_command
-from llama_stack.distribution.utils.image_types import LlamaStackImageType
+from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 from llama_stack.templates.template import DistributionTemplate
@ -122,7 +122,7 @@ def build_image(
                normal_deps.extend(api_spec.pip_packages)
    if build_config.image_type == LlamaStackImageType.CONTAINER.value:
-        script = str(importlib.resources.files("llama_stack") / "distribution/build_container.sh")
+        script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
        args = [
            script,
            "--template-or-config",
@ -139,7 +139,7 @@ def build_image(
        if run_config is not None:
            args.extend(["--run-config", run_config])
    elif build_config.image_type == LlamaStackImageType.CONDA.value:
-        script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh")
+        script = str(importlib.resources.files("llama_stack") / "core/build_conda_env.sh")
        args = [
            script,
            "--env-name",
@ -150,7 +150,7 @@ def build_image(
            " ".join(normal_deps),
        ]
    elif build_config.image_type == LlamaStackImageType.VENV.value:
-        script = str(importlib.resources.files("llama_stack") / "distribution/build_venv.sh")
+        script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
        args = [
            script,
            "--env-name",
--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -327,12 +327,12 @@ EOF
 # If a run config is provided, we use the --config flag
 if [[ -n "$run_config" ]]; then
  add_to_container << EOF
-ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "$RUN_CONFIG_PATH"]
+ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--config", "$RUN_CONFIG_PATH"]
 EOF
 # If a template is provided (not a yaml file), we use the --template flag
 elif [[ "$template_or_config" != *.yaml ]]; then
  add_to_container << EOF
-ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$template_or_config"]
+ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--template", "$template_or_config"]
 EOF
 fi
--- a/llama_stack/distribution/build_venv.sh
+++ b/llama_stack/distribution/build_venv.sh
--- a/llama_stack/distribution/client.py
+++ b/llama_stack/distribution/client.py
--- a/llama_stack/distribution/common.sh
+++ b/llama_stack/distribution/common.sh
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@ -7,20 +7,20 @@ import logging
 import textwrap
 from typing import Any
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    LLAMA_STACK_RUN_CONFIG_VERSION,
    DistributionSpec,
    Provider,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import (
+from llama_stack.core.distribution import (
    builtin_automatically_routed_apis,
    get_provider_registry,
 )
-from llama_stack.distribution.stack import cast_image_name_to_string, replace_env_vars
+from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
-from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
+from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
+from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.providers.datatypes import Api, ProviderSpec
 logger = logging.getLogger(__name__)
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -24,7 +24,7 @@ from llama_stack.apis.shields import Shield, ShieldInput
 from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
-from llama_stack.distribution.access_control.datatypes import AccessRule
+from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.providers.datatypes import Api, ProviderSpec
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -12,8 +12,8 @@ from typing import Any
 import yaml
 from pydantic import BaseModel
-from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.distribution.external import load_external_apis
+from llama_stack.core.external import load_external_apis
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    AdapterSpec,
--- a/llama_stack/distribution/external.py
+++ b/llama_stack/distribution/external.py
@ -8,7 +8,7 @@
 import yaml
 from llama_stack.apis.datatypes import Api, ExternalApiSpec
-from llama_stack.distribution.datatypes import BuildConfig, StackRunConfig
+from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
 logger = get_logger(name=__name__, category="core")
--- a/llama_stack/distribution/inspect.py
+++ b/llama_stack/distribution/inspect.py
@ -15,9 +15,9 @@ from llama_stack.apis.inspect import (
    RouteInfo,
    VersionInfo,
 )
-from llama_stack.distribution.datatypes import StackRunConfig
+from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.distribution.external import load_external_apis
+from llama_stack.core.external import load_external_apis
-from llama_stack.distribution.server.routes import get_all_api_routes
+from llama_stack.core.server.routes import get_all_api_routes
 from llama_stack.providers.datatypes import HealthStatus
--- a/llama_stack/distribution/library_client.py
+++ b/llama_stack/distribution/library_client.py
@ -31,23 +31,23 @@ from pydantic import BaseModel, TypeAdapter
 from rich.console import Console
 from termcolor import cprint
-from llama_stack.distribution.build import print_pip_install_help
+from llama_stack.core.build import print_pip_install_help
-from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
+from llama_stack.core.configure import parse_and_maybe_upgrade_config
-from llama_stack.distribution.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
+from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
-from llama_stack.distribution.request_headers import (
+from llama_stack.core.request_headers import (
    PROVIDER_DATA_VAR,
    request_provider_data_context,
 )
-from llama_stack.distribution.resolver import ProviderRegistry
+from llama_stack.core.resolver import ProviderRegistry
-from llama_stack.distribution.server.routes import RouteImpls, find_matching_route, initialize_route_impls
+from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
-from llama_stack.distribution.stack import (
+from llama_stack.core.stack import (
    construct_stack,
    get_stack_run_config_from_template,
    replace_env_vars,
 )
-from llama_stack.distribution.utils.config import redact_sensitive_fields
+from llama_stack.core.utils.config import redact_sensitive_fields
-from llama_stack.distribution.utils.context import preserve_contexts_async_generator
+from llama_stack.core.utils.context import preserve_contexts_async_generator
-from llama_stack.distribution.utils.exec import in_notebook
+from llama_stack.core.utils.exec import in_notebook
 from llama_stack.providers.utils.telemetry.tracing import (
    CURRENT_TRACE_CONTEXT,
    end_trace,
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
--- a/llama_stack/distribution/request_headers.py
+++ b/llama_stack/distribution/request_headers.py
@ -10,7 +10,7 @@ import logging
 from contextlib import AbstractContextManager
 from typing import Any
-from llama_stack.distribution.datatypes import User
+from llama_stack.core.datatypes import User
 from .utils.dynamic import instantiate_class_type
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -27,18 +27,18 @@ from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
-from llama_stack.distribution.client import get_client_impl
+from llama_stack.core.client import get_client_impl
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    AccessRule,
    AutoRoutedProviderSpec,
    Provider,
    RoutingTableProviderSpec,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import builtin_automatically_routed_apis
+from llama_stack.core.distribution import builtin_automatically_routed_apis
-from llama_stack.distribution.external import load_external_apis
+from llama_stack.core.external import load_external_apis
-from llama_stack.distribution.store import DistributionRegistry
+from llama_stack.core.store import DistributionRegistry
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    Api,
@ -183,7 +183,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
                spec=RoutingTableProviderSpec(
                    api=info.routing_table_api,
                    router_api=info.router_api,
-                    module="llama_stack.distribution.routers",
+                    module="llama_stack.core.routers",
                    api_dependencies=[],
                    deps__=[f"inner-{info.router_api.value}"],
                ),
@ -197,7 +197,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
                config={},
                spec=AutoRoutedProviderSpec(
                    api=info.router_api,
-                    module="llama_stack.distribution.routers",
+                    module="llama_stack.core.routers",
                    routing_table_api=info.routing_table_api,
                    api_dependencies=[info.routing_table_api],
                    # Add telemetry as an optional dependency to all auto-routed providers
--- a/llama_stack/distribution/routers/init.py
+++ b/llama_stack/distribution/routers/init.py
@ -6,9 +6,9 @@
 from typing import Any
-from llama_stack.distribution.datatypes import AccessRule, RoutedProtocol
+from llama_stack.core.datatypes import AccessRule, RoutedProtocol
-from llama_stack.distribution.stack import StackRunConfig
+from llama_stack.core.stack import StackRunConfig
-from llama_stack.distribution.store import DistributionRegistry
+from llama_stack.core.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
--- a/llama_stack/distribution/routers/datasets.py
+++ b/llama_stack/distribution/routers/datasets.py
--- a/llama_stack/distribution/routers/eval_scoring.py
+++ b/llama_stack/distribution/routers/eval_scoring.py
--- a/llama_stack/distribution/routers/inference.py
+++ b/llama_stack/distribution/routers/inference.py
--- a/llama_stack/distribution/routers/safety.py
+++ b/llama_stack/distribution/routers/safety.py
--- a/llama_stack/distribution/routers/tool_runtime.py
+++ b/llama_stack/distribution/routers/tool_runtime.py
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
--- a/llama_stack/distribution/routing_tables/init.py
+++ b/llama_stack/distribution/routing_tables/init.py
--- a/llama_stack/distribution/routing_tables/benchmarks.py
+++ b/llama_stack/distribution/routing_tables/benchmarks.py
@ -7,7 +7,7 @@
 from typing import Any
 from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routing_tables/common.py
+++ b/llama_stack/distribution/routing_tables/common.py
@ -10,16 +10,16 @@ from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.apis.models import Model
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.scoring_functions import ScoringFn
-from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
+from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
-from llama_stack.distribution.access_control.datatypes import Action
+from llama_stack.core.access_control.datatypes import Action
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    AccessRule,
    RoutableObject,
    RoutableObjectWithProvider,
    RoutedProtocol,
 )
-from llama_stack.distribution.request_headers import get_authenticated_user
+from llama_stack.core.request_headers import get_authenticated_user
-from llama_stack.distribution.store import DistributionRegistry
+from llama_stack.core.store import DistributionRegistry
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, RoutingTable
--- a/llama_stack/distribution/routing_tables/datasets.py
+++ b/llama_stack/distribution/routing_tables/datasets.py
@ -19,7 +19,7 @@ from llama_stack.apis.datasets import (
    URIDataSource,
 )
 from llama_stack.apis.resource import ResourceType
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    DatasetWithOwner,
 )
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routing_tables/models.py
+++ b/llama_stack/distribution/routing_tables/models.py
@ -9,7 +9,7 @@ from typing import Any
 from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    ModelWithOwner,
    RegistryEntrySource,
 )
--- a/llama_stack/distribution/routing_tables/scoring_functions.py
+++ b/llama_stack/distribution/routing_tables/scoring_functions.py
@ -12,7 +12,7 @@ from llama_stack.apis.scoring_functions import (
    ScoringFnParams,
    ScoringFunctions,
 )
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    ScoringFnWithOwner,
 )
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routing_tables/shields.py
+++ b/llama_stack/distribution/routing_tables/shields.py
@ -8,7 +8,7 @@ from typing import Any
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    ShieldWithOwner,
 )
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routing_tables/toolgroups.py
+++ b/llama_stack/distribution/routing_tables/toolgroups.py
@ -8,7 +8,7 @@ from typing import Any
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
-from llama_stack.distribution.datatypes import ToolGroupWithOwner
+from llama_stack.core.datatypes import ToolGroupWithOwner
 from llama_stack.log import get_logger
 from .common import CommonRoutingTableImpl
--- a/llama_stack/distribution/routing_tables/vector_dbs.py
+++ b/llama_stack/distribution/routing_tables/vector_dbs.py
@ -23,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    VectorDBWithOwner,
 )
 from llama_stack.log import get_logger
@ -84,8 +84,6 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
    async def unregister_vector_db(self, vector_db_id: str) -> None:
        existing_vector_db = await self.get_vector_db(vector_db_id)
        if existing_vector_db is None:
            raise VectorStoreNotFoundError(vector_db_id)
        await self.unregister_object(existing_vector_db)
    async def openai_retrieve_vector_store(
--- a/llama_stack/distribution/server/init.py
+++ b/llama_stack/distribution/server/init.py
--- a/llama_stack/distribution/server/auth.py
+++ b/llama_stack/distribution/server/auth.py
@ -9,10 +9,10 @@ import json
 import httpx
 from aiohttp import hdrs
-from llama_stack.distribution.datatypes import AuthenticationConfig, User
+from llama_stack.core.datatypes import AuthenticationConfig, User
-from llama_stack.distribution.request_headers import user_from_scope
+from llama_stack.core.request_headers import user_from_scope
-from llama_stack.distribution.server.auth_providers import create_auth_provider
+from llama_stack.core.server.auth_providers import create_auth_provider
-from llama_stack.distribution.server.routes import find_matching_route, initialize_route_impls
+from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
 from llama_stack.log import get_logger
 logger = get_logger(name=__name__, category="auth")
--- a/llama_stack/distribution/server/auth_providers.py
+++ b/llama_stack/distribution/server/auth_providers.py
@ -14,7 +14,7 @@ import httpx
 from jose import jwt
 from pydantic import BaseModel, Field
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    AuthenticationConfig,
    CustomAuthConfig,
    GitHubTokenAuthConfig,
--- a/llama_stack/distribution/server/quota.py
+++ b/llama_stack/distribution/server/quota.py
--- a/llama_stack/distribution/server/routes.py
+++ b/llama_stack/distribution/server/routes.py
@ -15,7 +15,7 @@ from starlette.routing import Route
 from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
 from llama_stack.apis.version import LLAMA_STACK_API_VERSION
-from llama_stack.distribution.resolver import api_protocol_map
+from llama_stack.core.resolver import api_protocol_map
 from llama_stack.schema_utils import WebMethod
 EndpointFunc = Callable[..., Any]
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -33,35 +33,35 @@ from pydantic import BaseModel, ValidationError
 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.cli.utils import add_config_template_args, get_config_from_args
-from llama_stack.distribution.access_control.access_control import AccessDeniedError
+from llama_stack.core.access_control.access_control import AccessDeniedError
-from llama_stack.distribution.datatypes import (
+from llama_stack.core.datatypes import (
    AuthenticationRequiredError,
    LoggingConfig,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import builtin_automatically_routed_apis
+from llama_stack.core.distribution import builtin_automatically_routed_apis
-from llama_stack.distribution.external import ExternalApiSpec, load_external_apis
+from llama_stack.core.external import ExternalApiSpec, load_external_apis
-from llama_stack.distribution.request_headers import (
+from llama_stack.core.request_headers import (
    PROVIDER_DATA_VAR,
    request_provider_data_context,
    user_from_scope,
 )
-from llama_stack.distribution.resolver import InvalidProviderError
+from llama_stack.core.resolver import InvalidProviderError
-from llama_stack.distribution.server.routes import (
+from llama_stack.core.server.routes import (
    find_matching_route,
    get_all_api_routes,
    initialize_route_impls,
 )
-from llama_stack.distribution.stack import (
+from llama_stack.core.stack import (
    cast_image_name_to_string,
    construct_stack,
    replace_env_vars,
    shutdown_stack,
    validate_env_pair,
 )
-from llama_stack.distribution.utils.config import redact_sensitive_fields
+from llama_stack.core.utils.config import redact_sensitive_fields
-from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template
+from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template
-from llama_stack.distribution.utils.context import preserve_contexts_async_generator
+from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -34,14 +34,14 @@ from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
-from llama_stack.distribution.datatypes import Provider, StackRunConfig
+from llama_stack.core.datatypes import Provider, StackRunConfig
-from llama_stack.distribution.distribution import get_provider_registry
+from llama_stack.core.distribution import get_provider_registry
-from llama_stack.distribution.inspect import DistributionInspectConfig, DistributionInspectImpl
+from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
-from llama_stack.distribution.providers import ProviderImpl, ProviderImplConfig
+from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
-from llama_stack.distribution.resolver import ProviderRegistry, resolve_impls
+from llama_stack.core.resolver import ProviderRegistry, resolve_impls
-from llama_stack.distribution.routing_tables.common import CommonRoutingTableImpl
+from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
-from llama_stack.distribution.store.registry import create_dist_registry
+from llama_stack.core.store.registry import create_dist_registry
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
--- a/llama_stack/distribution/start_stack.sh
+++ b/llama_stack/distribution/start_stack.sh
@ -122,7 +122,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
        yaml_config_arg=""
    fi
-    $PYTHON_BINARY -m llama_stack.distribution.server.server \
+    $PYTHON_BINARY -m llama_stack.core.server.server \
    $yaml_config_arg \
    --port "$port" \
    $env_vars \
--- a/llama_stack/distribution/store/init.py
+++ b/llama_stack/distribution/store/init.py
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@ -10,8 +10,8 @@ from typing import Protocol
 import pydantic
-from llama_stack.distribution.datatypes import RoutableObjectWithProvider
+from llama_stack.core.datatypes import RoutableObjectWithProvider
-from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
--- a/llama_stack/distribution/ui/Containerfile
+++ b/llama_stack/distribution/ui/Containerfile
--- a/llama_stack/distribution/ui/README.md
+++ b/llama_stack/distribution/ui/README.md
@ -36,7 +36,7 @@ llama-stack-client benchmarks register \
 3. Start Streamlit UI
 ```bash
-uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
+uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
 ```
 ## Environment Variables
--- a/Show more
+++ b/Show more
`@ -1 +1 @@`
	The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.	The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.