add kvant

2025-05-26 05:53:04 +02:00 · 2025-05-26 05:53:04 +02:00 · 1c1a54d5d4
commit 1c1a54d5d4
parent 9623d5d230
20 changed files with 449 additions and 0 deletions
--- a/.github/workflows/Dockerfile
+++ b/.github/workflows/Dockerfile
@ -0,0 +1 @@
+FROM distribution-kvant:dev
--- a/.github/workflows/changelog.yml
+++ b/.github/workflows/changelog.yml
@ -1,29 +0,0 @@
-name: Update Changelog
-
-on:
-  release:
-    types: [published, unpublished, created, edited, deleted, released]
-
-permissions:
-  contents: read
-
-jobs:
-  generate_changelog:
-    name: Generate changelog
-    permissions:
-      contents: write  # for peter-evans/create-pull-request to create branch
-      pull-requests: write  # for peter-evans/create-pull-request to create a PR
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          ref: main
-          fetch-depth: 0
-      - run: |
-          python ./scripts/gen-changelog.py
-      - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
-        with:
-          title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
-          commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
-          branch: create-pull-request/changelog
-          signoff: true
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -0,0 +1,78 @@
+name: Build and Push container
+run-name: Build and Push container
+on:
+  workflow_dispatch:
+  #schedule:
+  #  - cron: "0 10 * * *"
+  push:
+    branches:
+      - 'main'
+    tags:
+      - 'v*'
+  pull_request:
+    branches:
+      - 'main'
+env:
+  IMAGE: git.kvant.cloud/${{github.repository}}
+jobs:
+  build_concierge_backend:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set current time
+        uses: https://github.com/gerred/actions/current-time@master
+        id: current_time
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to git.kvant.cloud registry
+        uses: docker/login-action@v3
+        with:
+          registry: git.kvant.cloud
+          username: ${{ vars.ORG_PACKAGE_WRITER_USERNAME }}
+          password: ${{ secrets.ORG_PACKAGE_WRITER_TOKEN }}
+      
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          # list of Docker images to use as base name for tags
+          images: |
+            ${{env.IMAGE}}
+          # generate Docker tags based on the following events/attributes
+          tags: |
+            type=schedule
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+
+      - name: Install uv
+        run: pipx install uv
+            
+      - name: Build
+        env:
+          USE_COPY_NOT_MOUNT: true
+          LLAMA_STACK_DIR: .
+        run: uvx --from . llama stack build --template kvant --image-type container
+
+      - name: Build and push to gitea registry
+        uses: docker/build-push-action@v6
+        with:
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          context: .github/workflows
+          provenance: mode=max
+          sbom: true
+          build-args: |
+            BUILD_DATE=${{ steps.current_time.outputs.time }}
+          cache-from: |
+            type=registry,ref=${{ env.IMAGE }}:buildcache
+            type=registry,ref=${{ env.IMAGE }}:${{ github.ref_name }}
+            type=registry,ref=${{ env.IMAGE }}:main
+          cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache,mode=max,image-manifest=true
--- a/.github/workflows/gha_workflow_llama_stack_tests.yml
+++ b/.github/workflows/gha_workflow_llama_stack_tests.yml
@ -1,355 +0,0 @@
-name: "Run Llama-stack Tests"
-
-on:
-  #### Temporarily disable PR runs until tests run as intended within mainline.
-  #TODO Add this back.
-  #pull_request_target:
-  #  types: ["opened"]
-  #  branches:
-  #    - 'main'
-  #  paths:
-  #    - 'llama_stack/**/*.py'
-  #    - 'tests/**/*.py'
-
-  workflow_dispatch:
-    inputs:
-      runner:
-        description: 'GHA Runner Scale Set label to run workflow on.'
-        required: true
-        default: "llama-stack-gha-runner-gpu"
-
-      checkout_reference:
-        description: "The branch, tag, or SHA to checkout"
-        required: true
-        default: "main"
-
-      debug:
-        description: 'Run debugging steps?'
-        required: false
-        default: "true"
-
-      sleep_time:
-        description: '[DEBUG] sleep time for debugging'
-        required: true
-        default: "0"
-
-      provider_id:
-        description: 'ID of your provider'
-        required: true
-        default: "meta_reference"
-
-      model_id:
-        description: 'Shorthand name for target model ID (llama_3b or llama_8b)'
-        required: true
-        default: "llama_3b"
-
-      model_override_3b:
-        description: 'Specify shorthand model for <llama_3b> '
-        required: false
-        default: "Llama3.2-3B-Instruct"
-
-      model_override_8b:
-        description: 'Specify shorthand model for <llama_8b> '
-        required: false
-        default: "Llama3.1-8B-Instruct"
-
-env:
-  # ID used for each test's provider config
-  PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}"
-
-  # Path to model checkpoints within EFS volume
-  MODEL_CHECKPOINT_DIR: "/data/llama"
-
-  # Path to directory to run tests from
-  TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests"
-
-  # Keep track of a list of model IDs that are valid to use within pytest fixture marks
-  AVAILABLE_MODEL_IDs: "llama_3b llama_8b"
-
-  # Shorthand name for model ID, used in pytest fixture marks
-  MODEL_ID: "${{ inputs.model_id || 'llama_3b' }}"
-
-  # Override the `llama_3b` / `llama_8b' models, else use the default.
-  LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama3.2-3B-Instruct' }}"
-  LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama3.1-8B-Instruct' }}"
-
-  # Defines which directories in TESTS_PATH to exclude from the test loop
-  EXCLUDED_DIRS: "__pycache__"
-
-  # Defines the output xml reports generated after a test is run
-  REPORTS_GEN: ""
-
-jobs:
-  execute_workflow:
-    name: Execute workload on Self-Hosted GPU k8s runner
-    permissions:
-      pull-requests: write
-    defaults:
-      run:
-        shell: bash
-    runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }}
-    if: always()
-    steps:
-
-      ##############################
-      #### INITIAL DEBUG CHECKS ####
-      ##############################
-      - name: "[DEBUG] Check content of the EFS mount"
-        id: debug_efs_volume
-        continue-on-error: true
-        if: inputs.debug == 'true'
-        run: |
-            echo "========= Content of the EFS mount ============="
-            ls -la ${{ env.MODEL_CHECKPOINT_DIR }}
-
-      - name: "[DEBUG] Get runner container OS information"
-        id: debug_os_info
-        if: ${{ inputs.debug == 'true' }}
-        run: |
-            cat /etc/os-release
-
-      - name: "[DEBUG] Print environment variables"
-        id: debug_env_vars
-        if: ${{ inputs.debug == 'true' }}
-        run: |
-            echo "PROVIDER_ID = ${PROVIDER_ID}"
-            echo "MODEL_CHECKPOINT_DIR = ${MODEL_CHECKPOINT_DIR}"
-            echo "AVAILABLE_MODEL_IDs = ${AVAILABLE_MODEL_IDs}"
-            echo "MODEL_ID = ${MODEL_ID}"
-            echo "LLAMA_3B_OVERRIDE = ${LLAMA_3B_OVERRIDE}"
-            echo "LLAMA_8B_OVERRIDE = ${LLAMA_8B_OVERRIDE}"
-            echo "EXCLUDED_DIRS = ${EXCLUDED_DIRS}"
-            echo "REPORTS_GEN = ${REPORTS_GEN}"
-
-      ############################
-      #### MODEL INPUT CHECKS ####
-      ############################
-
-      - name: "Check if env.model_id is valid"
-        id: check_model_id
-        run: |
-          if [[ " ${AVAILABLE_MODEL_IDs[@]} " =~ " ${MODEL_ID} " ]]; then
-            echo "Model ID '${MODEL_ID}' is valid."
-          else
-            echo "Model ID '${MODEL_ID}' is invalid. Terminating workflow."
-            exit 1
-          fi
-
-      #######################
-      #### CODE CHECKOUT ####
-      #######################
-      - name: "Checkout 'meta-llama/llama-stack' repository"
-        id: checkout_repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          ref: ${{ inputs.branch }}
-
-      - name: "[DEBUG] Content of the repository after checkout"
-        id: debug_content_after_checkout
-        if: ${{ inputs.debug == 'true' }}
-        run: |
-            ls -la ${GITHUB_WORKSPACE}
-
-      ##########################################################
-      ####              OPTIONAL SLEEP DEBUG                ####
-      #                                                        #
-      # Use to "exec" into the test k8s POD and run tests      #
-      # manually to identify what dependencies are being used. #
-      #                                                        #
-      ##########################################################
-      - name: "[DEBUG] sleep"
-        id: debug_sleep
-        if: ${{ inputs.debug == 'true' && inputs.sleep_time != '' }}
-        run: |
-            sleep ${{ inputs.sleep_time }}
-
-      ############################
-      #### UPDATE SYSTEM PATH ####
-      ############################
-      - name: "Update path: execute"
-        id: path_update_exec
-        run: |
-          # .local/bin is needed for certain libraries installed below to be recognized
-          # when calling their executable to install sub-dependencies
-          mkdir -p ${HOME}/.local/bin
-          echo "${HOME}/.local/bin" >> "$GITHUB_PATH"
-
-      #####################################
-      #### UPDATE CHECKPOINT DIRECTORY ####
-      #####################################
-      - name: "Update checkpoint directory"
-        id: checkpoint_update
-        run: |
-          echo "Checkpoint directory: ${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE"
-          if [ "${MODEL_ID}" = "llama_3b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" ]; then
-            echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" >> "$GITHUB_ENV"
-          elif [ "${MODEL_ID}" = "llama_8b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" ]; then
-            echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" >> "$GITHUB_ENV"
-          else
-            echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing. Terminating workflow."
-            exit 1
-          fi
-
-      - name: "[DEBUG] Checkpoint update check"
-        id: debug_checkpoint_update
-        if: ${{ inputs.debug == 'true' }}
-        run: |
-          echo "MODEL_CHECKPOINT_DIR (after update) = ${MODEL_CHECKPOINT_DIR}"
-
-      ##################################
-      #### DEPENDENCY INSTALLATIONS ####
-      ##################################
-      - name: "Installing 'apt' required packages"
-        id: install_apt
-        run: |
-          echo "[STEP] Installing 'apt' required packages"
-          sudo apt update -y
-          sudo apt install -y python3 python3-pip npm wget
-
-      - name: "Installing packages with 'curl'"
-        id: install_curl
-        run: |
-          curl -fsSL https://ollama.com/install.sh | sh
-
-      - name: "Installing packages with 'wget'"
-        id: install_wget
-        run: |
-          wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-          chmod +x Miniconda3-latest-Linux-x86_64.sh
-          ./Miniconda3-latest-Linux-x86_64.sh -b install -c pytorch -c nvidia faiss-gpu=1.9.0
-          # Add miniconda3 bin to system path
-          echo "${HOME}/miniconda3/bin" >> "$GITHUB_PATH"
-
-      - name: "Installing packages with 'npm'"
-        id: install_npm_generic
-        run: |
-          sudo npm install -g junit-merge
-
-      - name: "Installing pip dependencies"
-        id: install_pip_generic
-        run: |
-          echo "[STEP] Installing 'llama-stack' models"
-          pip install -U pip setuptools
-          pip install -r requirements.txt
-          pip install -e .
-          pip install -U \
-            torch torchvision \
-            pytest pytest_asyncio \
-            fairscale lm-format-enforcer \
-            zmq chardet pypdf \
-            pandas sentence_transformers together \
-            aiosqlite
-      - name: "Installing packages with conda"
-        id: install_conda_generic
-        run: |
-          conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0
-
-      #############################################################
-      #### TESTING TO BE DONE FOR BOTH PRS AND MANUAL DISPATCH ####
-      #############################################################
-      - name: "Run Tests: Loop"
-        id: run_tests_loop
-        working-directory: "${{ github.workspace }}"
-        run: |
-          pattern=""
-          for dir in llama_stack/providers/tests/*; do
-            if [ -d "$dir" ]; then
-              dir_name=$(basename "$dir")
-              if [[ ! " $EXCLUDED_DIRS " =~ " $dir_name " ]]; then
-                for file in "$dir"/test_*.py; do
-                  test_name=$(basename "$file")
-                  new_file="result-${dir_name}-${test_name}.xml"
-                  if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \
-                     --junitxml="${{ github.workspace }}/${new_file}"; then
-                    echo "Ran test: ${test_name}"
-                  else
-                    echo "Did NOT run test: ${test_name}"
-                  fi
-                  pattern+="${new_file} "
-                done
-              fi
-            fi
-          done
-          echo "REPORTS_GEN=$pattern" >> "$GITHUB_ENV"
-
-      - name: "Test Summary: Merge"
-        id: test_summary_merge
-        working-directory: "${{ github.workspace }}"
-        run: |
-          echo "Merging the following test result files: ${REPORTS_GEN}"
-          # Defaults to merging them into 'merged-test-results.xml'
-          junit-merge ${{ env.REPORTS_GEN }}
-
-      ############################################
-      #### AUTOMATIC TESTING ON PULL REQUESTS ####
-      ############################################
-
-      #### Run tests ####
-
-      - name: "PR - Run Tests"
-        id: pr_run_tests
-        working-directory: "${{ github.workspace }}"
-        if: github.event_name == 'pull_request_target'
-        run: |
-          echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE} | path: ${{ github.workspace }}"
-          # (Optional) Add more tests here.
-
-          # Merge test results with 'merged-test-results.xml' from above.
-          # junit-merge <new-test-results> merged-test-results.xml
-
-      #### Create test summary ####
-
-      - name: "PR - Test Summary"
-        id: pr_test_summary_create
-        if: github.event_name == 'pull_request_target'
-        uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86 # v2.4
-        with:
-          paths: "${{ github.workspace }}/merged-test-results.xml"
-          output: test-summary.md
-
-      - name: "PR - Upload Test Summary"
-        id: pr_test_summary_upload
-        if: github.event_name == 'pull_request_target'
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
-        with:
-          name: test-summary
-          path: test-summary.md
-
-      #### Update PR request ####
-
-      - name: "PR - Update comment"
-        id: pr_update_comment
-        if: github.event_name == 'pull_request_target'
-        uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3.0.1
-        with:
-          filePath: test-summary.md
-
-      ########################
-      #### MANUAL TESTING ####
-      ########################
-
-      #### Run tests ####
-
-      - name: "Manual - Run Tests: Prep"
-        id: manual_run_tests
-        working-directory: "${{ github.workspace }}"
-        if: github.event_name == 'workflow_dispatch'
-        run: |
-          echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}"
-
-          #TODO Use this when collection errors are resolved
-          # pytest -s -v -m "${PROVIDER_ID} and ${MODEL_ID}" --junitxml="${{ github.workspace }}/merged-test-results.xml"
-
-          # (Optional) Add more tests here.
-
-          # Merge test results with 'merged-test-results.xml' from above.
-          # junit-merge <new-test-results> merged-test-results.xml
-
-      #### Create test summary ####
-
-      - name: "Manual - Test Summary"
-        id: manual_test_summary
-        if: always() && github.event_name == 'workflow_dispatch'
-        uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86 # v2.4
-        with:
-          paths: "${{ github.workspace }}/merged-test-results.xml"
--- a/.github/workflows/install-script-ci.yml
+++ b/.github/workflows/install-script-ci.yml
@ -1,26 +0,0 @@
-name: Installer CI
-
-on:
-  pull_request:
-    paths:
-      - 'install.sh'
-  push:
-    paths:
-      - 'install.sh'
-  schedule:
-    - cron: '0 2 * * *'  # every day at 02:00 UTC
-
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
-      - name: Run ShellCheck on install.sh
-        run: shellcheck install.sh
-  smoke-test:
-    needs: lint
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
-      - name: Run installer end-to-end
-        run: ./install.sh
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@ -1,132 +0,0 @@
-name: Integration Auth Tests
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-    paths:
-      - 'distributions/**'
-      - 'llama_stack/**'
-      - 'tests/integration/**'
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'requirements.txt'
-      - '.github/workflows/integration-auth-tests.yml' # This workflow
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  test-matrix:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        auth-provider: [oauth2_token]
-      fail-fast: false # we want to run all tests regardless of failure
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Build Llama Stack
-        run: |
-          llama stack build --template ollama --image-type venv
-
-      - name: Install minikube
-        if: ${{ matrix.auth-provider == 'kubernetes' }}
-        uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19
-
-      - name: Start minikube
-        if: ${{ matrix.auth-provider == 'oauth2_token' }}
-        run: |
-          minikube start
-          kubectl get pods -A
-
-      - name: Configure Kube Auth
-        if: ${{ matrix.auth-provider == 'oauth2_token' }}
-        run: |
-          kubectl create namespace llama-stack
-          kubectl create serviceaccount llama-stack-auth -n llama-stack
-          kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
-          kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
-          cat <<EOF | kubectl apply -f -
-          apiVersion: rbac.authorization.k8s.io/v1
-          kind: ClusterRole
-          metadata:
-            name: allow-anonymous-openid
-          rules:
-          - nonResourceURLs: ["/openid/v1/jwks"]
-            verbs: ["get"]
-          ---
-          apiVersion: rbac.authorization.k8s.io/v1
-          kind: ClusterRoleBinding
-          metadata:
-            name: allow-anonymous-openid
-          roleRef:
-            apiGroup: rbac.authorization.k8s.io
-            kind: ClusterRole
-            name: allow-anonymous-openid
-          subjects:
-          - kind: User
-            name: system:anonymous
-            apiGroup: rbac.authorization.k8s.io
-          EOF
-
-      - name: Set Kubernetes Config
-        if: ${{ matrix.auth-provider == 'oauth2_token' }}
-        run: |
-          echo "KUBERNETES_API_SERVER_URL=$(kubectl get --raw /.well-known/openid-configuration| jq -r .jwks_uri)" >> $GITHUB_ENV
-          echo "KUBERNETES_CA_CERT_PATH=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}')" >> $GITHUB_ENV
-          echo "KUBERNETES_ISSUER=$(kubectl get --raw /.well-known/openid-configuration| jq -r .issuer)" >> $GITHUB_ENV
-          echo "KUBERNETES_AUDIENCE=$(kubectl create token llama-stack-auth -n llama-stack --duration=1h | cut -d. -f2 | base64 -d | jq -r '.aud[0]')" >> $GITHUB_ENV
-
-      - name: Set Kube Auth Config and run server
-        env:
-          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
-        if: ${{ matrix.auth-provider == 'oauth2_token' }}
-        run: |
-          run_dir=$(mktemp -d)
-          cat <<'EOF' > $run_dir/run.yaml
-          version: '2'
-          image_name: kube
-          apis: []
-          providers: {}
-          server:
-            port: 8321
-          EOF
-          yq eval '.server.auth = {"provider_type": "${{ matrix.auth-provider }}"}' -i $run_dir/run.yaml
-          yq eval '.server.auth.config = {"tls_cafile": "${{ env.KUBERNETES_CA_CERT_PATH }}", "issuer": "${{ env.KUBERNETES_ISSUER }}", "audience": "${{ env.KUBERNETES_AUDIENCE }}"}' -i $run_dir/run.yaml
-          yq eval '.server.auth.config.jwks = {"uri": "${{ env.KUBERNETES_API_SERVER_URL }}"}' -i $run_dir/run.yaml
-          cat $run_dir/run.yaml
-
-          nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
-
-      - name: Wait for Llama Stack server to be ready
-        run: |
-          echo "Waiting for Llama Stack server..."
-          for i in {1..30}; do
-            if curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://localhost:8321/v1/health | grep -q "OK"; then
-              echo "Llama Stack server is up!"
-              if grep -q "Enabling authentication with provider: ${{ matrix.auth-provider }}" server.log; then
-                echo "Llama Stack server is configured to use ${{ matrix.auth-provider }} auth"
-                exit 0
-              else
-                echo "Llama Stack server is not configured to use ${{ matrix.auth-provider }} auth"
-                cat server.log
-                exit 1
-              fi
-            fi
-            sleep 1
-          done
-          echo "Llama Stack server failed to start"
-          cat server.log
-          exit 1
-
-      - name: Test auth
-        run: |
-          curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers|jq
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -1,116 +0,0 @@
-name: Integration Tests
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-    paths:
-      - 'llama_stack/**'
-      - 'tests/integration/**'
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'requirements.txt'
-      - '.github/workflows/integration-tests.yml' # This workflow
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  test-matrix:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        # Listing tests manually since some of them currently fail
-        # TODO: generate matrix list from tests/integration when fixed
-        test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime]
-        client-type: [library, http]
-      fail-fast: false # we want to run all tests regardless of failure
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Setup ollama
-        uses: ./.github/actions/setup-ollama
-
-      - name: Build Llama Stack
-        run: |
-          llama stack build --template ollama --image-type venv
-
-      - name: Start Llama Stack server in background
-        if: matrix.client-type == 'http'
-        env:
-          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
-        run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv &
-
-      - name: Wait for Llama Stack server to be ready
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Waiting for Llama Stack server..."
-          for i in {1..30}; do
-            if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
-              echo "Llama Stack server is up!"
-              exit 0
-            fi
-            sleep 1
-          done
-          echo "Llama Stack server failed to start"
-          cat server.log
-          exit 1
-
-      - name: Verify Ollama status is OK
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Verifying Ollama status..."
-          ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
-          echo "Ollama status: $ollama_status"
-          if [ "$ollama_status" != "OK" ]; then
-            echo "Ollama health check failed"
-            exit 1
-          fi
-
-      - name: Check Storage and Memory Available Before Tests
-        if: ${{ always() }}
-        run: |
-          free -h
-          df -h
-
-      - name: Run Integration Tests
-        env:
-          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
-        run: |
-          if [ "${{ matrix.client-type }}" == "library" ]; then
-            stack_config="ollama"
-          else
-            stack_config="http://localhost:8321"
-          fi
-          uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
-            -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
-            --text-model="meta-llama/Llama-3.2-3B-Instruct" \
-            --embedding-model=all-MiniLM-L6-v2
-
-      - name: Check Storage and Memory Available After Tests
-        if: ${{ always() }}
-        run: |
-          free -h
-          df -h
-
-      - name: Write ollama logs to file
-        if: ${{ always() }}
-        run: |
-          sudo journalctl -u ollama.service > ollama.log
-
-      - name: Upload all logs to artifacts
-        if: ${{ always() }}
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
-        with:
-          name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}
-          path: |
-            *.log
-          retention-days: 1
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -1,45 +0,0 @@
-name: Pre-commit
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Set up Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: '3.11'
-          cache: pip
-          cache-dependency-path: |
-            **/requirements*.txt
-            .pre-commit-config.yaml
-
-      - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
-        env:
-          SKIP: no-commit-to-branch
-          RUFF_OUTPUT_FORMAT: github
-
-      - name: Verify if there are any diff files after pre-commit
-        run: |
-          git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
-
-      - name: Verify if there are any new files after pre-commit
-        run: |
-          unstaged_files=$(git ls-files --others --exclude-standard)
-          if [ -n "$unstaged_files" ]; then
-            echo "There are uncommitted new files, run pre-commit locally and commit again"
-            echo "$unstaged_files"
-            exit 1
-          fi
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -1,147 +0,0 @@
-name: Test Llama Stack Build
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - 'llama_stack/cli/stack/build.py'
-      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/distribution/build.*'
-      - 'llama_stack/distribution/*.sh'
-      - '.github/workflows/providers-build.yml'
-  pull_request:
-    paths:
-      - 'llama_stack/cli/stack/build.py'
-      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/distribution/build.*'
-      - 'llama_stack/distribution/*.sh'
-      - '.github/workflows/providers-build.yml'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  generate-matrix:
-    runs-on: ubuntu-latest
-    outputs:
-      templates: ${{ steps.set-matrix.outputs.templates }}
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Generate Template List
-        id: set-matrix
-        run: |
-          templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
-          echo "templates=$templates" >> "$GITHUB_OUTPUT"
-
-  build:
-    needs: generate-matrix
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
-        image-type: [venv, container]
-      fail-fast: false # We want to run all jobs even if some fail
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Print build dependencies
-        run: |
-          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
-
-      - name: Run Llama Stack Build
-        run: |
-          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
-          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
-
-      - name: Print dependencies in the image
-        if: matrix.image-type == 'venv'
-        run: |
-          uv pip list
-
-  build-single-provider:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Build a single provider
-        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
-
-  build-custom-container-distribution:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Build a single provider
-        run: |
-          yq -i '.image_type = "container"' llama_stack/templates/starter/build.yaml
-          yq -i '.image_name = "test"' llama_stack/templates/starter/build.yaml
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/templates/starter/build.yaml
-
-      - name: Inspect the container image entrypoint
-        run: |
-          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
-          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
-          echo "Entrypoint: $entrypoint"
-          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
-            echo "Entrypoint is not correct"
-            exit 1
-          fi
-
-  build-ubi9-container-distribution:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Pin template to UBI9 base
-        run: |
-          yq -i '
-            .image_type    = "container" |
-            .image_name    = "ubi9-test" |
-            .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
-          ' llama_stack/templates/starter/build.yaml
-
-      - name: Build dev container (UBI9)
-        env:
-          USE_COPY_NOT_MOUNT: "true"
-          LLAMA_STACK_DIR: "."
-        run: |
-          uv run llama stack build --config llama_stack/templates/starter/build.yaml
-
-      - name: Inspect UBI9 image
-        run: |
-          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
-          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
-          echo "Entrypoint: $entrypoint"
-          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
-            echo "Entrypoint is not correct"
-            exit 1
-          fi
-
-          echo "Checking /etc/os-release in $IMAGE_ID"
-          docker run --rm --entrypoint sh "$IMAGE_ID" -c \
-              'source /etc/os-release && echo "$ID"' \
-              | grep -qE '^(rhel|ubi)$' \
-              || { echo "Base image is not UBI 9!"; exit 1; }
--- a/.github/workflows/semantic-pr.yml
+++ b/.github/workflows/semantic-pr.yml
@ -1,25 +0,0 @@
-name: Check semantic PR titles
-
-on:
-  pull_request_target:
-    types:
-      - opened
-      - edited
-      - reopened
-      - synchronize
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  title-check:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check PR Title's semantic conformance
-        uses: amannn/action-semantic-pull-request@0723387faaf9b38adef4775cd42cfd5155ed6017 # v5.5.3
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/stale_bot.yml
+++ b/.github/workflows/stale_bot.yml
@ -1,45 +0,0 @@
-name: Close stale issues and PRs
-
-on:
-  schedule:
-    - cron: '0 0 * * *' # every day at midnight
-
-env:
-  LC_ALL: en_US.UTF-8
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-  contents: read
-
-jobs:
-  stale:
-    permissions:
-      issues: write
-      pull-requests: write
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stale Action
-        uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
-        with:
-          stale-issue-label: 'stale'
-          stale-issue-message: >
-            This issue has been automatically marked as stale because it has not had activity within 60 days.
-            It will be automatically closed if no further activity occurs within 30 days.
-          close-issue-message: >
-            This issue has been automatically closed due to inactivity.
-            Please feel free to reopen if you feel it is still relevant!
-          days-before-issue-stale: 60
-          days-before-issue-close: 30
-          stale-pr-label: 'stale'
-          stale-pr-message: >
-            This pull request has been automatically marked as stale because it has not had activity within 60 days.
-            It will be automatically closed if no further activity occurs within 30 days.
-          close-pr-message: >
-            This pull request has been automatically closed due to inactivity.
-            Please feel free to reopen if you intend to continue working on it!
-          days-before-pr-stale: 60
-          days-before-pr-close: 30
-          operations-per-run: 300
--- a/.github/workflows/test-external-providers.yml
+++ b/.github/workflows/test-external-providers.yml
@ -1,71 +0,0 @@
-name: Test External Providers
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-    paths:
-      - 'llama_stack/**'
-      - 'tests/integration/**'
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'requirements.txt'
-      - '.github/workflows/test-external-providers.yml' # This workflow
-
-jobs:
-  test-external-providers:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        image-type: [venv]
-        # We don't do container yet, it's tricky to install a package from the host into the
-        # container and point 'uv pip install' to the correct path...
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Apply image type to config file
-        run: |
-          yq -i '.image_type = "${{ matrix.image-type }}"' tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
-          cat tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
-
-      - name: Setup directory for Ollama custom provider
-        run: |
-          mkdir -p tests/external-provider/llama-stack-provider-ollama/src/
-          cp -a llama_stack/providers/remote/inference/ollama/ tests/external-provider/llama-stack-provider-ollama/src/llama_stack_provider_ollama
-
-      - name: Create provider configuration
-        run: |
-          mkdir -p /home/runner/.llama/providers.d/remote/inference
-          cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml
-
-      - name: Build distro from config file
-        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
-
-      - name: Start Llama Stack server in background
-        if: ${{ matrix.image-type }} == 'venv'
-        env:
-          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
-        run: |
-          uv run pip list
-          nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
-
-      - name: Wait for Llama Stack server to be ready
-        run: |
-          for i in {1..30}; do
-            if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then
-              echo "Waiting for Llama Stack server to load the provider..."
-              sleep 1
-            else
-              echo "Provider loaded"
-              exit 0
-            fi
-          done
-          echo "Provider failed to load"
-          cat server.log
-          exit 1
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -1,69 +0,0 @@
-name: auto-tests
-
-on:
-  # pull_request:
-  workflow_dispatch:
-    inputs:
-      commit_sha:
-        description: 'Specific Commit SHA to trigger on'
-        required: false
-        default: $GITHUB_SHA # default to the last commit of $GITHUB_REF branch
-
-jobs:
-  test-llama-stack-as-library:
-    runs-on: ubuntu-latest
-    env:
-      TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
-      FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
-      TAVILY_SEARCH_API_KEY: ${{ secrets.TAVILY_SEARCH_API_KEY }}
-    strategy:
-      matrix:
-        provider: [fireworks, together]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          ref: ${{ github.event.inputs.commit_sha }}
-
-      - name: Echo commit SHA
-        run: |
-          echo "Triggered on commit SHA: ${{ github.event.inputs.commit_sha }}"
-          git rev-parse HEAD
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements.txt pytest
-          pip install -e .
-
-      - name: Build providers
-        run: |
-          llama stack build --template ${{ matrix.provider }} --image-type venv
-
-      - name: Install the latest llama-stack-client & llama-models packages
-        run: |
-          pip install -e git+https://github.com/meta-llama/llama-stack-client-python.git#egg=llama-stack-client
-          pip install -e git+https://github.com/meta-llama/llama-models.git#egg=llama-models
-
-      - name: Run client-sdk test
-        working-directory: "${{ github.workspace }}"
-        env:
-          REPORT_OUTPUT: md_report.md
-        shell: bash
-        run: |
-          pip install --upgrade pytest-md-report
-          echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
-
-          export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
-          LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/ --md-report-output "$REPORT_OUTPUT"
-
-      - name: Output reports to the job summary
-        if: always()
-        shell: bash
-        run: |
-          if [ -f "$REPORT_FILE" ]; then
-            echo "<details><summary> Test Report for ${{ matrix.provider }} </summary>" >> $GITHUB_STEP_SUMMARY
-            echo "" >> $GITHUB_STEP_SUMMARY
-            cat "$REPORT_FILE" >> $GITHUB_STEP_SUMMARY
-            echo "" >> $GITHUB_STEP_SUMMARY
-            echo "</details>" >> $GITHUB_STEP_SUMMARY
-          fi
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@ -1,52 +0,0 @@
-name: Unit Tests
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-    paths:
-      - 'llama_stack/**'
-      - 'tests/unit/**'
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'requirements.txt'
-      - '.github/workflows/unit-tests.yml' # This workflow
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  unit-tests:
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python:
-          - "3.10"
-          - "3.11"
-          - "3.12"
-          - "3.13"
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Run unit tests
-        run: |
-          PYTHON_VERSION=${{ matrix.python }} ./scripts/unit-tests.sh --cov=llama_stack --junitxml=pytest-report-${{ matrix.python }}.xml --cov-report=html:htmlcov-${{ matrix.python }}
-
-      - name: Upload test results
-        if: always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
-        with:
-          name: test-results-${{ matrix.python }}
-          path: |
-            .pytest_cache/
-            pytest-report-${{ matrix.python }}.xml
-            htmlcov-${{ matrix.python }}/
-          retention-days: 7
--- a/.github/workflows/update-readthedocs.yml
+++ b/.github/workflows/update-readthedocs.yml
@ -1,68 +0,0 @@
-name: Update ReadTheDocs
-
-on:
-  workflow_dispatch:
-    inputs:
-      branch:
-        description: 'RTD version to update'
-        required: false
-        default: 'latest'
-  push:
-    branches:
-      - main
-    paths:
-      - 'docs/**'
-      - 'pyproject.toml'
-      - '.github/workflows/update-readthedocs.yml'
-    tags:
-      - '*'
-  pull_request:
-    branches:
-      - main
-    paths:
-      - 'docs/**'
-      - 'pyproject.toml'
-      - '.github/workflows/update-readthedocs.yml'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  update-readthedocs:
-    runs-on: ubuntu-latest
-    env:
-      TOKEN: ${{ secrets.READTHEDOCS_TOKEN }}
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Install dependencies
-        uses: ./.github/actions/setup-runner
-
-      - name: Build HTML
-        run: |
-          cd docs
-          uv run make html
-
-      - name: Trigger ReadTheDocs build
-        if: github.event_name != 'pull_request'
-        run: |
-          if [ -z "$TOKEN" ]; then
-            echo "READTHEDOCS_TOKEN is not set"
-            exit 1
-          fi
-
-          response=$(curl -X POST \
-            -H "Content-Type: application/json" \
-            -d "{
-              \"token\": \"$TOKEN\",
-              \"version\": \"$GITHUB_REF_NAME\"
-            }" \
-            https://readthedocs.org/api/v2/webhook/llama-stack/289768/)
-
-          echo "Response: $response"
-          if [ $(echo $response | jq -r '.build_triggered') != 'true' ]; then
-            echo "Failed to trigger ReadTheDocs build"
-            exit 1
-          fi